aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/llite
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-09 10:32:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-09 10:32:39 -0700
commiteafdca4d7010a0e019aaaace3dd71b432a69b54c (patch)
tree0206168276ece10426dbbef7b3de7e8d84c8674d /drivers/staging/lustre/lustre/llite
parentMerge tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm (diff)
parentstaging: ipx: delete it from the tree (diff)
downloadlinux-dev-eafdca4d7010a0e019aaaace3dd71b432a69b54c.tar.xz
linux-dev-eafdca4d7010a0e019aaaace3dd71b432a69b54c.zip
Merge tag 'staging-4.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging
Pull staging/IIO updates from Greg KH: "Here is the big staging and IIO driver update for 4.18-rc1. It was delayed as I wanted to make sure the final driver deletions did not cause any major merge issues, and all now looks good. There are a lot of patches here, just over 1000. The diffstat summary shows the major changes here: 1007 files changed, 16828 insertions(+), 227770 deletions(-) Because of this, we might be close to shrinking the overall kernel source code size for two releases in a row. There was loads of work in this release cycle, primarily: - tons of ks7010 driver cleanups - lots of mt7621 driver fixes and cleanups - most driver cleanups - wilc1000 fixes and cleanups - lots and lots of IIO driver cleanups and new additions - debugfs cleanups for all staging drivers - lots of other staging driver cleanups and fixes, the shortlog has the full details. but the big user-visable things here are the removal of 3 chunks of code: - ncpfs and ipx were removed on schedule, no one has cared about this code since it moved to staging last year, and if it needs to come back, it can be reverted. - lustre file system is removed. I've ranted at the lustre developers about once a year for the past 5 years, with no real forward progress at all to clean things up and get the code into the "real" part of the kernel. Given that the lustre developers continue to work on an external tree and try to port those changes to the in-kernel tree every once in a while, this whole thing really really is not working out at all. So I'm deleting it so that the developers can spend the time working in their out-of-tree location and get things cleaned up properly to get merged into the tree correctly at a later date. Because of these file removals, you will have merge issues on some of these files (2 in the ipx code, 1 in the ncpfs code, and 1 in the atomisp driver). Just delete those files, it's a simple merge :) All of this has been in linux-next for a while with no reported problems" * tag 'staging-4.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging: (1011 commits) staging: ipx: delete it from the tree ncpfs: remove uapi .h files ncpfs: remove Documentation ncpfs: remove compat functionality staging: ncpfs: delete it staging: lustre: delete the filesystem from the tree. staging: vc04_services: no need to save the log debufs dentries staging: vc04_services: vchiq_debugfs_log_entry can be a void * staging: vc04_services: remove struct vchiq_debugfs_info staging: vc04_services: move client dbg directory into static variable staging: vc04_services: remove odd vchiq_debugfs_top() wrapper staging: vc04_services: no need to check debugfs return values staging: mt7621-gpio: reorder includes alphabetically staging: mt7621-gpio: change gc_map to don't use pointers staging: mt7621-gpio: use GPIOF_DIR_OUT and GPIOF_DIR_IN macros instead of custom values staging: mt7621-gpio: change 'to_mediatek_gpio' to make just a one line return staging: mt7621-gpio: dt-bindings: update documentation for #interrupt-cells property staging: mt7621-gpio: update #interrupt-cells for the gpio node staging: mt7621-gpio: dt-bindings: complete documentation for the gpio staging: mt7621-dts: add missing properties to gpio node ...
Diffstat (limited to 'drivers/staging/lustre/lustre/llite')
-rw-r--r--drivers/staging/lustre/lustre/llite/Makefile11
-rw-r--r--drivers/staging/lustre/lustre/llite/dcache.c300
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c1706
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c3600
-rw-r--r--drivers/staging/lustre/lustre/llite/glimpse.c206
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_cl.c293
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_misc.c186
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h1337
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c2666
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c478
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_nfs.c375
-rw-r--r--drivers/staging/lustre/lustre/llite/lproc_llite.c1684
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c1202
-rw-r--r--drivers/staging/lustre/lustre/llite/range_lock.c240
-rw-r--r--drivers/staging/lustre/lustre/llite/range_lock.h83
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c1214
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c641
-rw-r--r--drivers/staging/lustre/lustre/llite/statahead.c1577
-rw-r--r--drivers/staging/lustre/lustre/llite/super25.c185
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c159
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_dev.c659
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_internal.h321
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c1374
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_lock.c87
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_object.c305
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c523
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c638
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_cache.c523
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_security.c96
29 files changed, 0 insertions, 22669 deletions
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
deleted file mode 100644
index 519fd747e3ad..000000000000
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += lustre.o
-lustre-y := dcache.o dir.o file.o llite_lib.o llite_nfs.o \
- rw.o rw26.o namei.o symlink.o llite_mmap.o range_lock.o \
- xattr.o xattr_cache.o xattr_security.o \
- super25.o statahead.o glimpse.o lcommon_cl.o lcommon_misc.o \
- vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o \
- lproc_llite.o
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
deleted file mode 100644
index 11b82c639bfe..000000000000
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ /dev/null
@@ -1,300 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/quotaops.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <lustre_dlm.h>
-
-#include "llite_internal.h"
-
-static void free_dentry_data(struct rcu_head *head)
-{
- struct ll_dentry_data *lld;
-
- lld = container_of(head, struct ll_dentry_data, lld_rcu_head);
- kfree(lld);
-}
-
-/* should NOT be called with the dcache lock, see fs/dcache.c */
-static void ll_release(struct dentry *de)
-{
- struct ll_dentry_data *lld;
-
- LASSERT(de);
- lld = ll_d2d(de);
- if (lld->lld_it) {
- ll_intent_release(lld->lld_it);
- kfree(lld->lld_it);
- }
-
- de->d_fsdata = NULL;
- call_rcu(&lld->lld_rcu_head, free_dentry_data);
-}
-
-/* Compare if two dentries are the same. Don't match if the existing dentry
- * is marked invalid. Returns 1 if different, 0 if the same.
- *
- * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
- * an AST before calling d_revalidate_it(). The dentry still exists (marked
- * INVALID) so d_lookup() matches it, but we have no lock on it (so
- * lock_match() fails) and we spin around real_lookup().
- *
- * This race doesn't apply to lookups in d_alloc_parallel(), and for
- * those we want to ensure that only one dentry with a given name is
- * in ll_lookup_nd() at a time. So allow invalid dentries to match
- * while d_in_lookup(). We will be called again when the lookup
- * completes, and can give a different answer then.
- */
-static int ll_dcompare(const struct dentry *dentry,
- unsigned int len, const char *str,
- const struct qstr *name)
-{
- if (len != name->len)
- return 1;
-
- if (memcmp(str, name->name, len))
- return 1;
-
- CDEBUG(D_DENTRY, "found name %.*s(%p) flags %#x refc %d\n",
- name->len, name->name, dentry, dentry->d_flags,
- d_count(dentry));
-
- /* mountpoint is always valid */
- if (d_mountpoint(dentry))
- return 0;
-
- /* ensure exclusion against parallel lookup of the same name */
- if (d_in_lookup((struct dentry *)dentry))
- return 0;
-
- if (d_lustre_invalid(dentry))
- return 1;
-
- return 0;
-}
-
-/**
- * Called when last reference to a dentry is dropped and dcache wants to know
- * whether or not it should cache it:
- * - return 1 to delete the dentry immediately
- * - return 0 to cache the dentry
- * Should NOT be called with the dcache lock, see fs/dcache.c
- */
-static int ll_ddelete(const struct dentry *de)
-{
- LASSERT(de);
-
- CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n",
- d_lustre_invalid(de) ? "deleting" : "keeping",
- de, de, de->d_parent, d_inode(de),
- d_unhashed(de) ? "" : "hashed,",
- list_empty(&de->d_subdirs) ? "" : "subdirs");
-
- /* kernel >= 2.6.38 last refcount is decreased after this function. */
- LASSERT(d_count(de) == 1);
-
- if (d_lustre_invalid(de))
- return 1;
- return 0;
-}
-
-static int ll_d_init(struct dentry *de)
-{
- struct ll_dentry_data *lld = kzalloc(sizeof(*lld), GFP_KERNEL);
-
- if (unlikely(!lld))
- return -ENOMEM;
- lld->lld_invalid = 1;
- de->d_fsdata = lld;
- return 0;
-}
-
-void ll_intent_drop_lock(struct lookup_intent *it)
-{
- if (it->it_op && it->it_lock_mode) {
- struct lustre_handle handle;
-
- handle.cookie = it->it_lock_handle;
-
- CDEBUG(D_DLMTRACE,
- "releasing lock with cookie %#llx from it %p\n",
- handle.cookie, it);
- ldlm_lock_decref(&handle, it->it_lock_mode);
-
- /* bug 494: intent_release may be called multiple times, from
- * this thread and we don't want to double-decref this lock
- */
- it->it_lock_mode = 0;
- if (it->it_remote_lock_mode != 0) {
- handle.cookie = it->it_remote_lock_handle;
-
- CDEBUG(D_DLMTRACE,
- "releasing remote lock with cookie%#llx from it %p\n",
- handle.cookie, it);
- ldlm_lock_decref(&handle,
- it->it_remote_lock_mode);
- it->it_remote_lock_mode = 0;
- }
- }
-}
-
-void ll_intent_release(struct lookup_intent *it)
-{
- CDEBUG(D_INFO, "intent %p released\n", it);
- ll_intent_drop_lock(it);
- /* We are still holding extra reference on a request, need to free it */
- if (it_disposition(it, DISP_ENQ_OPEN_REF))
- ptlrpc_req_finished(it->it_request); /* ll_file_open */
-
- if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
- ptlrpc_req_finished(it->it_request);
-
- it->it_disposition = 0;
- it->it_request = NULL;
-}
-
-void ll_invalidate_aliases(struct inode *inode)
-{
- struct dentry *dentry;
-
- CDEBUG(D_INODE, "marking dentries for ino " DFID "(%p) invalid\n",
- PFID(ll_inode2fid(inode)), inode);
-
- spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
- CDEBUG(D_DENTRY,
- "dentry in drop %pd (%p) parent %p inode %p flags %d\n",
- dentry, dentry, dentry->d_parent,
- d_inode(dentry), dentry->d_flags);
-
- d_lustre_invalidate(dentry, 0);
- }
- spin_unlock(&inode->i_lock);
-}
-
-int ll_revalidate_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it,
- struct inode *inode)
-{
- int rc = 0;
-
- if (!request)
- return 0;
-
- if (it_disposition(it, DISP_LOOKUP_NEG))
- return -ENOENT;
-
- rc = ll_prep_inode(&inode, request, NULL, it);
-
- return rc;
-}
-
-void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
-{
- if (it->it_lock_mode && inode) {
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- CDEBUG(D_DLMTRACE, "setting l_data to inode " DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
- ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
- }
-
- /* drop lookup or getattr locks immediately */
- if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) {
- /* on 2.6 there are situation when several lookups and
- * revalidations may be requested during single operation.
- * therefore, we don't release intent here -bzzz
- */
- ll_intent_drop_lock(it);
- }
-}
-
-static int ll_revalidate_dentry(struct dentry *dentry,
- unsigned int lookup_flags)
-{
- struct inode *dir = d_inode(dentry->d_parent);
-
- /* If this is intermediate component path lookup and we were able to get
- * to this dentry, then its lock has not been revoked and the
- * path component is valid.
- */
- if (lookup_flags & LOOKUP_PARENT)
- return 1;
-
- /* Symlink - always valid as long as the dentry was found */
- if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))
- return 1;
-
- /*
- * VFS warns us that this is the second go around and previous
- * operation failed (most likely open|creat), so this time
- * we better talk to the server via the lookup path by name,
- * not by fid.
- */
- if (lookup_flags & LOOKUP_REVAL)
- return 0;
-
- if (!dentry_may_statahead(dir, dentry))
- return 1;
-
- if (lookup_flags & LOOKUP_RCU)
- return -ECHILD;
-
- ll_statahead(dir, &dentry, !d_inode(dentry));
- return 1;
-}
-
-/*
- * Always trust cached dentries. Update statahead window if necessary.
- */
-static int ll_revalidate_nd(struct dentry *dentry, unsigned int flags)
-{
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, flags=%u\n",
- dentry, flags);
-
- return ll_revalidate_dentry(dentry, flags);
-}
-
-const struct dentry_operations ll_d_ops = {
- .d_init = ll_d_init,
- .d_revalidate = ll_revalidate_nd,
- .d_release = ll_release,
- .d_delete = ll_ddelete,
- .d_compare = ll_dcompare,
-};
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
deleted file mode 100644
index d10d27268323..000000000000
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ /dev/null
@@ -1,1706 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/dir.c
- *
- * Directory code for lustre client.
- */
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/mm.h>
-#include <linux/uaccess.h>
-#include <linux/buffer_head.h> /* for wait_on_buffer */
-#include <linux/pagevec.h>
-#include <linux/prefetch.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_lib.h>
-#include <lustre_dlm.h>
-#include <lustre_fid.h>
-#include <lustre_kernelcomm.h>
-#include <lustre_swab.h>
-
-#include "llite_internal.h"
-
-/*
- * (new) readdir implementation overview.
- *
- * Original lustre readdir implementation cached exact copy of raw directory
- * pages on the client. These pages were indexed in client page cache by
- * logical offset in the directory file. This design, while very simple and
- * intuitive had some inherent problems:
- *
- * . it implies that byte offset to the directory entry serves as a
- * telldir(3)/seekdir(3) cookie, but that offset is not stable: in
- * ext3/htree directory entries may move due to splits, and more
- * importantly,
- *
- * . it is incompatible with the design of split directories for cmd3,
- * that assumes that names are distributed across nodes based on their
- * hash, and so readdir should be done in hash order.
- *
- * New readdir implementation does readdir in hash order, and uses hash of a
- * file name as a telldir/seekdir cookie. This led to number of complications:
- *
- * . hash is not unique, so it cannot be used to index cached directory
- * pages on the client (note, that it requires a whole pageful of hash
- * collided entries to cause two pages to have identical hashes);
- *
- * . hash is not unique, so it cannot, strictly speaking, be used as an
- * entry cookie. ext3/htree has the same problem and lustre implementation
- * mimics their solution: seekdir(hash) positions directory at the first
- * entry with the given hash.
- *
- * Client side.
- *
- * 0. caching
- *
- * Client caches directory pages using hash of the first entry as an index. As
- * noted above hash is not unique, so this solution doesn't work as is:
- * special processing is needed for "page hash chains" (i.e., sequences of
- * pages filled with entries all having the same hash value).
- *
- * First, such chains have to be detected. To this end, server returns to the
- * client the hash of the first entry on the page next to one returned. When
- * client detects that this hash is the same as hash of the first entry on the
- * returned page, page hash collision has to be handled. Pages in the
- * hash chain, except first one, are termed "overflow pages".
- *
- * Solution to index uniqueness problem is to not cache overflow
- * pages. Instead, when page hash collision is detected, all overflow pages
- * from emerging chain are immediately requested from the server and placed in
- * a special data structure (struct ll_dir_chain). This data structure is used
- * by ll_readdir() to process entries from overflow pages. When readdir
- * invocation finishes, overflow pages are discarded. If page hash collision
- * chain weren't completely processed, next call to readdir will again detect
- * page hash collision, again read overflow pages in, process next portion of
- * entries and again discard the pages. This is not as wasteful as it looks,
- * because, given reasonable hash, page hash collisions are extremely rare.
- *
- * 1. directory positioning
- *
- * When seekdir(hash) is called, original
- *
- *
- *
- *
- *
- *
- *
- *
- * Server.
- *
- * identification of and access to overflow pages
- *
- * page format
- *
- * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains
- * a header lu_dirpage which describes the start/end hash, and whether this
- * page is empty (contains no dir entry) or hash collide with next page.
- * After client receives reply, several pages will be integrated into dir page
- * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the lu_dirpage
- * for this integrated page will be adjusted. See lmv_adjust_dirpages().
- *
- */
-struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
- __u64 offset)
-{
- struct md_callback cb_op;
- struct page *page;
- int rc;
-
- cb_op.md_blocking_ast = ll_md_blocking_ast;
- rc = md_read_page(ll_i2mdexp(dir), op_data, &cb_op, offset, &page);
- if (rc)
- return ERR_PTR(rc);
-
- return page;
-}
-
-void ll_release_page(struct inode *inode, struct page *page, bool remove)
-{
- kunmap(page);
-
- /*
- * Always remove the page for striped dir, because the page is
- * built from temporarily in LMV layer
- */
- if (inode && S_ISDIR(inode->i_mode) &&
- ll_i2info(inode)->lli_lsm_md) {
- __free_page(page);
- return;
- }
-
- if (remove) {
- lock_page(page);
- if (likely(page->mapping))
- truncate_complete_page(page->mapping, page);
- unlock_page(page);
- }
- put_page(page);
-}
-
-/**
- * return IF_* type for given lu_dirent entry.
- * IF_* flag shld be converted to particular OS file type in
- * platform llite module.
- */
-static __u16 ll_dirent_type_get(struct lu_dirent *ent)
-{
- __u16 type = 0;
- struct luda_type *lt;
- int len = 0;
-
- if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
- const unsigned int align = sizeof(struct luda_type) - 1;
-
- len = le16_to_cpu(ent->lde_namelen);
- len = (len + align) & ~align;
- lt = (void *)ent->lde_name + len;
- type = IFTODT(le16_to_cpu(lt->lt_type));
- }
- return type;
-}
-
-int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
- struct dir_context *ctx)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- __u64 pos = *ppos;
- int is_api32 = ll_need_32bit_api(sbi);
- int is_hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
- struct page *page;
- bool done = false;
- int rc = 0;
-
- page = ll_get_dir_page(inode, op_data, pos);
-
- while (rc == 0 && !done) {
- struct lu_dirpage *dp;
- struct lu_dirent *ent;
- __u64 hash;
- __u64 next;
-
- if (IS_ERR(page)) {
- rc = PTR_ERR(page);
- break;
- }
-
- hash = MDS_DIR_END_OFF;
- dp = page_address(page);
- for (ent = lu_dirent_start(dp); ent && !done;
- ent = lu_dirent_next(ent)) {
- __u16 type;
- int namelen;
- struct lu_fid fid;
- __u64 lhash;
- __u64 ino;
-
- hash = le64_to_cpu(ent->lde_hash);
- if (hash < pos)
- /*
- * Skip until we find target hash
- * value.
- */
- continue;
-
- namelen = le16_to_cpu(ent->lde_namelen);
- if (namelen == 0)
- /*
- * Skip dummy record.
- */
- continue;
-
- if (is_api32 && is_hash64)
- lhash = hash >> 32;
- else
- lhash = hash;
- fid_le_to_cpu(&fid, &ent->lde_fid);
- ino = cl_fid_build_ino(&fid, is_api32);
- type = ll_dirent_type_get(ent);
- ctx->pos = lhash;
- /* For 'll_nfs_get_name_filldir()', it will try
- * to access the 'ent' through its 'lde_name',
- * so the parameter 'name' for 'ctx->actor()'
- * must be part of the 'ent'.
- */
- done = !dir_emit(ctx, ent->lde_name,
- namelen, ino, type);
- }
-
- if (done) {
- pos = hash;
- ll_release_page(inode, page, false);
- break;
- }
-
- next = le64_to_cpu(dp->ldp_hash_end);
- pos = next;
- if (pos == MDS_DIR_END_OFF) {
- /*
- * End of directory reached.
- */
- done = 1;
- ll_release_page(inode, page, false);
- } else {
- /*
- * Normal case: continue to the next
- * page.
- */
- ll_release_page(inode, page,
- le32_to_cpu(dp->ldp_flags) &
- LDF_COLLIDE);
- next = pos;
- page = ll_get_dir_page(inode, op_data, pos);
- }
- }
-
- ctx->pos = pos;
- return rc;
-}
-
-static int ll_readdir(struct file *filp, struct dir_context *ctx)
-{
- struct inode *inode = file_inode(filp);
- struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- __u64 pos = lfd ? lfd->lfd_pos : 0;
- int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
- int api32 = ll_need_32bit_api(sbi);
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op:inode=" DFID "(%p) pos/size %lu/%llu 32bit_api %d\n",
- PFID(ll_inode2fid(inode)), inode, (unsigned long)pos,
- i_size_read(inode), api32);
-
- if (pos == MDS_DIR_END_OFF) {
- /*
- * end-of-file.
- */
- rc = 0;
- goto out;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
- LUSTRE_OPC_ANY, inode);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- if (unlikely(op_data->op_mea1)) {
- /*
- * This is only needed for striped dir to fill ..,
- * see lmv_read_page
- */
- if (file_dentry(filp)->d_parent &&
- file_dentry(filp)->d_parent->d_inode) {
- __u64 ibits = MDS_INODELOCK_UPDATE;
- struct inode *parent;
-
- parent = file_dentry(filp)->d_parent->d_inode;
- if (ll_have_md_lock(parent, &ibits, LCK_MINMODE))
- op_data->op_fid3 = *ll_inode2fid(parent);
- }
-
- /*
- * If it can not find in cache, do lookup .. on the master
- * object
- */
- if (fid_is_zero(&op_data->op_fid3)) {
- rc = ll_dir_get_parent_fid(inode, &op_data->op_fid3);
- if (rc) {
- ll_finish_md_op_data(op_data);
- return rc;
- }
- }
- }
- op_data->op_max_pages = sbi->ll_md_brw_pages;
- ctx->pos = pos;
- rc = ll_dir_read(inode, &pos, op_data, ctx);
- pos = ctx->pos;
- if (lfd)
- lfd->lfd_pos = pos;
-
- if (pos == MDS_DIR_END_OFF) {
- if (api32)
- pos = LL_DIR_END_OFF_32BIT;
- else
- pos = LL_DIR_END_OFF;
- } else {
- if (api32 && hash64)
- pos >>= 32;
- }
- ctx->pos = pos;
- ll_finish_md_op_data(op_data);
-out:
- if (!rc)
- ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1);
-
- return rc;
-}
-
-static int ll_send_mgc_param(struct obd_export *mgc, char *string)
-{
- struct mgs_send_param *msp;
- int rc = 0;
-
- msp = kzalloc(sizeof(*msp), GFP_NOFS);
- if (!msp)
- return -ENOMEM;
-
- strlcpy(msp->mgs_param, string, sizeof(msp->mgs_param));
- rc = obd_set_info_async(NULL, mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO,
- sizeof(struct mgs_send_param), msp, NULL);
- if (rc)
- CERROR("Failed to set parameter: %d\n", rc);
- kfree(msp);
-
- return rc;
-}
-
-/**
- * Create striped directory with specified stripe(@lump)
- *
- * param[in] parent the parent of the directory.
- * param[in] lump the specified stripes.
- * param[in] dirname the name of the directory.
- * param[in] mode the specified mode of the directory.
- *
- * retval =0 if striped directory is being created successfully.
- * <0 if the creation is failed.
- */
-static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump,
- const char *dirname, umode_t mode)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- struct ll_sb_info *sbi = ll_i2sbi(parent);
- struct inode *inode = NULL;
- struct dentry dentry;
- int err;
-
- if (unlikely(lump->lum_magic != LMV_USER_MAGIC))
- return -EINVAL;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p) name %s stripe_offset %d, stripe_count: %u\n",
- PFID(ll_inode2fid(parent)), parent, dirname,
- (int)lump->lum_stripe_offset, lump->lum_stripe_count);
-
- if (lump->lum_stripe_count > 1 &&
- !(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_DIR_STRIPE))
- return -EINVAL;
-
- if (lump->lum_magic != cpu_to_le32(LMV_USER_MAGIC))
- lustre_swab_lmv_user_md(lump);
-
- if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
- mode &= ~current_umask();
- mode = (mode & (0777 | S_ISVTX)) | S_IFDIR;
- op_data = ll_prep_md_op_data(NULL, parent, NULL, dirname,
- strlen(dirname), mode, LUSTRE_OPC_MKDIR,
- lump);
- if (IS_ERR(op_data)) {
- err = PTR_ERR(op_data);
- goto err_exit;
- }
-
- op_data->op_cli_flags |= CLI_SET_MEA;
- err = md_create(sbi->ll_md_exp, op_data, lump, sizeof(*lump), mode,
- from_kuid(&init_user_ns, current_fsuid()),
- from_kgid(&init_user_ns, current_fsgid()),
- cfs_curproc_cap_pack(), 0, &request);
- ll_finish_md_op_data(op_data);
-
- err = ll_prep_inode(&inode, request, parent->i_sb, NULL);
- if (err)
- goto err_exit;
-
- memset(&dentry, 0, sizeof(dentry));
- dentry.d_inode = inode;
-
- err = ll_init_security(&dentry, inode, parent);
- iput(inode);
-
-err_exit:
- ptlrpc_req_finished(request);
- return err;
-}
-
-int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
- int set_default)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- int rc = 0;
- struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
- struct obd_device *mgc = lsi->lsi_mgc;
- int lum_size;
-
- if (lump) {
- /*
- * This is coming from userspace, so should be in
- * local endian. But the MDS would like it in little
- * endian, so we swab it before we send it.
- */
- switch (lump->lmm_magic) {
- case LOV_USER_MAGIC_V1: {
- if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
- lustre_swab_lov_user_md_v1(lump);
- lum_size = sizeof(struct lov_user_md_v1);
- break;
- }
- case LOV_USER_MAGIC_V3: {
- if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
- lustre_swab_lov_user_md_v3(
- (struct lov_user_md_v3 *)lump);
- lum_size = sizeof(struct lov_user_md_v3);
- break;
- }
- case LMV_USER_MAGIC: {
- if (lump->lmm_magic != cpu_to_le32(LMV_USER_MAGIC))
- lustre_swab_lmv_user_md(
- (struct lmv_user_md *)lump);
- lum_size = sizeof(struct lmv_user_md);
- break;
- }
- default: {
- CDEBUG(D_IOCTL,
- "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
- lump->lmm_magic, LOV_USER_MAGIC_V1,
- LOV_USER_MAGIC_V3);
- return -EINVAL;
- }
- }
- } else {
- lum_size = sizeof(struct lov_user_md_v1);
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- /* swabbing is done in lov_setstripe() on server side */
- rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, &req);
- ll_finish_md_op_data(op_data);
- ptlrpc_req_finished(req);
- if (rc)
- return rc;
-
-#if OBD_OCD_VERSION(2, 13, 53, 0) > LUSTRE_VERSION_CODE
- /*
- * 2.9 server has stored filesystem default stripe in ROOT xattr,
- * and it's stored into system config for backward compatibility.
- *
- * In the following we use the fact that LOV_USER_MAGIC_V1 and
- * LOV_USER_MAGIC_V3 have the same initial fields so we do not
- * need to make the distinction between the 2 versions
- */
- if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
- char *param = NULL;
- char *buf;
-
- param = kzalloc(MGS_PARAM_MAXLEN, GFP_NOFS);
- if (!param)
- return -ENOMEM;
-
- buf = param;
- /* Get fsname and assume devname to be -MDT0000. */
- ll_get_fsname(inode->i_sb, buf, MTI_NAME_MAXLEN);
- strcat(buf, "-MDT0000.lov");
- buf += strlen(buf);
-
- /* Set root stripesize */
- sprintf(buf, ".stripesize=%u",
- lump ? le32_to_cpu(lump->lmm_stripe_size) : 0);
- rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
- if (rc)
- goto end;
-
- /* Set root stripecount */
- sprintf(buf, ".stripecount=%hd",
- lump ? le16_to_cpu(lump->lmm_stripe_count) : 0);
- rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
- if (rc)
- goto end;
-
- /* Set root stripeoffset */
- sprintf(buf, ".stripeoffset=%hd",
- lump ? le16_to_cpu(lump->lmm_stripe_offset) :
- (typeof(lump->lmm_stripe_offset))(-1));
- rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
-
-end:
- kfree(param);
- }
-#endif
- return rc;
-}
-
-/**
- * This function will be used to get default LOV/LMV/Default LMV
- * @valid will be used to indicate which stripe it will retrieve
- * OBD_MD_MEA LMV stripe EA
- * OBD_MD_DEFAULT_MEA Default LMV stripe EA
- * otherwise Default LOV EA.
- * Each time, it can only retrieve 1 stripe EA
- **/
-int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size,
- struct ptlrpc_request **request, u64 valid)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct mdt_body *body;
- struct lov_mds_md *lmm = NULL;
- struct ptlrpc_request *req = NULL;
- int rc, lmmsize;
- struct md_op_data *op_data;
-
- rc = ll_get_max_mdsize(sbi, &lmmsize);
- if (rc)
- return rc;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
- 0, lmmsize, LUSTRE_OPC_ANY,
- NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- CDEBUG(D_INFO, "md_getattr failed on inode " DFID ": rc %d\n",
- PFID(ll_inode2fid(inode)), rc);
- goto out;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- lmmsize = body->mbo_eadatasize;
-
- if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
- lmmsize == 0) {
- rc = -ENODATA;
- goto out;
- }
-
- lmm = req_capsule_server_sized_get(&req->rq_pill,
- &RMF_MDT_MD, lmmsize);
- LASSERT(lmm);
-
- /*
- * This is coming from the MDS, so is probably in
- * little endian. We convert it to host endian before
- * passing it to userspace.
- */
- /* We don't swab objects for directories */
- switch (le32_to_cpu(lmm->lmm_magic)) {
- case LOV_MAGIC_V1:
- if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC)
- lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
- break;
- case LOV_MAGIC_V3:
- if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC)
- lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
- break;
- case LMV_MAGIC_V1:
- if (cpu_to_le32(LMV_MAGIC) != LMV_MAGIC)
- lustre_swab_lmv_mds_md((union lmv_mds_md *)lmm);
- break;
- case LMV_USER_MAGIC:
- if (cpu_to_le32(LMV_USER_MAGIC) != LMV_USER_MAGIC)
- lustre_swab_lmv_user_md((struct lmv_user_md *)lmm);
- break;
- default:
- CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
- rc = -EPROTO;
- }
-out:
- *plmm = lmm;
- *plmm_size = lmmsize;
- *request = req;
- return rc;
-}
-
-int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid)
-{
- struct md_op_data *op_data;
- int mdt_index, rc;
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return -ENOMEM;
-
- op_data->op_flags |= MF_GET_MDT_IDX;
- op_data->op_fid1 = *fid;
- rc = md_getattr(sbi->ll_md_exp, op_data, NULL);
- mdt_index = op_data->op_mds;
- kvfree(op_data);
- if (rc < 0)
- return rc;
-
- return mdt_index;
-}
-
-/*
- * Get MDT index for the inode.
- */
-int ll_get_mdt_idx(struct inode *inode)
-{
- return ll_get_mdt_idx_by_fid(ll_i2sbi(inode), ll_inode2fid(inode));
-}
-
-/**
- * Generic handler to do any pre-copy work.
- *
- * It sends a first hsm_progress (with extent length == 0) to coordinator as a
- * first information for it that real work has started.
- *
- * Moreover, for a ARCHIVE request, it will sample the file data version and
- * store it in \a copy.
- *
- * \return 0 on success.
- */
-static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct hsm_progress_kernel hpk;
- int rc2, rc = 0;
-
- /* Forge a hsm_progress based on data from copy. */
- hpk.hpk_fid = copy->hc_hai.hai_fid;
- hpk.hpk_cookie = copy->hc_hai.hai_cookie;
- hpk.hpk_extent.offset = copy->hc_hai.hai_extent.offset;
- hpk.hpk_extent.length = 0;
- hpk.hpk_flags = 0;
- hpk.hpk_errval = 0;
- hpk.hpk_data_version = 0;
-
- /* For archive request, we need to read the current file version. */
- if (copy->hc_hai.hai_action == HSMA_ARCHIVE) {
- struct inode *inode;
- __u64 data_version = 0;
-
- /* Get inode for this fid */
- inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
- if (IS_ERR(inode)) {
- hpk.hpk_flags |= HP_FLAG_RETRY;
- /* hpk_errval is >= 0 */
- hpk.hpk_errval = -PTR_ERR(inode);
- rc = PTR_ERR(inode);
- goto progress;
- }
-
- /* Read current file data version */
- rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
- iput(inode);
- if (rc != 0) {
- CDEBUG(D_HSM,
- "Could not read file data version of " DFID " (rc = %d). Archive request (%#llx) could not be done.\n",
- PFID(&copy->hc_hai.hai_fid), rc,
- copy->hc_hai.hai_cookie);
- hpk.hpk_flags |= HP_FLAG_RETRY;
- /* hpk_errval must be >= 0 */
- hpk.hpk_errval = -rc;
- goto progress;
- }
-
- /* Store in the hsm_copy for later copytool use.
- * Always modified even if no lsm.
- */
- copy->hc_data_version = data_version;
- }
-
-progress:
- /* On error, the request should be considered as completed */
- if (hpk.hpk_errval > 0)
- hpk.hpk_flags |= HP_FLAG_COMPLETED;
- rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
- &hpk, NULL);
-
- return rc ? rc : rc2;
-}
-
-/**
- * Generic handler to do any post-copy work.
- *
- * It will send the last hsm_progress update to coordinator to inform it
- * that copy is finished and whether it was successful or not.
- *
- * Moreover,
- * - for ARCHIVE request, it will sample the file data version and compare it
- * with the version saved in ll_ioc_copy_start(). If they do not match, copy
- * will be considered as failed.
- * - for RESTORE request, it will sample the file data version and send it to
- * coordinator which is useful if the file was imported as 'released'.
- *
- * \return 0 on success.
- */
-static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct hsm_progress_kernel hpk;
- int rc2, rc = 0;
-
- /* If you modify the logic here, also check llapi_hsm_copy_end(). */
- /* Take care: copy->hc_hai.hai_action, len, gid and data are not
- * initialized if copy_end was called with copy == NULL.
- */
-
- /* Forge a hsm_progress based on data from copy. */
- hpk.hpk_fid = copy->hc_hai.hai_fid;
- hpk.hpk_cookie = copy->hc_hai.hai_cookie;
- hpk.hpk_extent = copy->hc_hai.hai_extent;
- hpk.hpk_flags = copy->hc_flags | HP_FLAG_COMPLETED;
- hpk.hpk_errval = copy->hc_errval;
- hpk.hpk_data_version = 0;
-
- /* For archive request, we need to check the file data was not changed.
- *
- * For restore request, we need to send the file data version, this is
- * useful when the file was created using hsm_import.
- */
- if (((copy->hc_hai.hai_action == HSMA_ARCHIVE) ||
- (copy->hc_hai.hai_action == HSMA_RESTORE)) &&
- (copy->hc_errval == 0)) {
- struct inode *inode;
- __u64 data_version = 0;
-
- /* Get lsm for this fid */
- inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
- if (IS_ERR(inode)) {
- hpk.hpk_flags |= HP_FLAG_RETRY;
- /* hpk_errval must be >= 0 */
- hpk.hpk_errval = -PTR_ERR(inode);
- rc = PTR_ERR(inode);
- goto progress;
- }
-
- rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
- iput(inode);
- if (rc) {
- CDEBUG(D_HSM,
- "Could not read file data version. Request could not be confirmed.\n");
- if (hpk.hpk_errval == 0)
- hpk.hpk_errval = -rc;
- goto progress;
- }
-
- /* Store in the hsm_copy for later copytool use.
- * Always modified even if no lsm.
- */
- hpk.hpk_data_version = data_version;
-
- /* File could have been stripped during archiving, so we need
- * to check anyway.
- */
- if ((copy->hc_hai.hai_action == HSMA_ARCHIVE) &&
- (copy->hc_data_version != data_version)) {
- CDEBUG(D_HSM, "File data version mismatched. File content was changed during archiving. " DFID ", start:%#llx current:%#llx\n",
- PFID(&copy->hc_hai.hai_fid),
- copy->hc_data_version, data_version);
- /* File was changed, send error to cdt. Do not ask for
- * retry because if a file is modified frequently,
- * the cdt will loop on retried archive requests.
- * The policy engine will ask for a new archive later
- * when the file will not be modified for some tunable
- * time
- */
- hpk.hpk_flags &= ~HP_FLAG_RETRY;
- rc = -EBUSY;
- /* hpk_errval must be >= 0 */
- hpk.hpk_errval = -rc;
- }
- }
-
-progress:
- rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
- &hpk, NULL);
-
- return rc ? rc : rc2;
-}
-
-static int copy_and_ioctl(int cmd, struct obd_export *exp,
- const void __user *data, size_t size)
-{
- void *copy;
- int rc;
-
- copy = memdup_user(data, size);
- if (IS_ERR(copy))
- return PTR_ERR(copy);
-
- rc = obd_iocontrol(cmd, exp, size, copy, NULL);
- kfree(copy);
-
- return rc;
-}
-
-static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
-{
- int cmd = qctl->qc_cmd;
- int type = qctl->qc_type;
- int id = qctl->qc_id;
- int valid = qctl->qc_valid;
- int rc = 0;
-
- switch (cmd) {
- case Q_SETQUOTA:
- case Q_SETINFO:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
- case Q_GETQUOTA:
- if (((type == USRQUOTA &&
- !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) ||
- (type == GRPQUOTA &&
- !in_egroup_p(make_kgid(&init_user_ns, id)))) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
- case Q_GETINFO:
- break;
- default:
- CERROR("unsupported quotactl op: %#x\n", cmd);
- return -ENOTTY;
- }
-
- if (valid != QC_GENERAL) {
- if (cmd == Q_GETINFO)
- qctl->qc_cmd = Q_GETOINFO;
- else if (cmd == Q_GETQUOTA)
- qctl->qc_cmd = Q_GETOQUOTA;
- else
- return -EINVAL;
-
- switch (valid) {
- case QC_MDTIDX:
- rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
- sizeof(*qctl), qctl, NULL);
- break;
- case QC_OSTIDX:
- rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_dt_exp,
- sizeof(*qctl), qctl, NULL);
- break;
- case QC_UUID:
- rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
- sizeof(*qctl), qctl, NULL);
- if (rc == -EAGAIN)
- rc = obd_iocontrol(OBD_IOC_QUOTACTL,
- sbi->ll_dt_exp,
- sizeof(*qctl), qctl, NULL);
- break;
- default:
- rc = -EINVAL;
- break;
- }
-
- if (rc)
- return rc;
-
- qctl->qc_cmd = cmd;
- } else {
- struct obd_quotactl *oqctl;
-
- oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
- if (!oqctl)
- return -ENOMEM;
-
- QCTL_COPY(oqctl, qctl);
- rc = obd_quotactl(sbi->ll_md_exp, oqctl);
- if (rc) {
- kfree(oqctl);
- return rc;
- }
- /* If QIF_SPACE is not set, client should collect the
- * space usage from OSSs by itself
- */
- if (cmd == Q_GETQUOTA &&
- !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) &&
- !oqctl->qc_dqblk.dqb_curspace) {
- struct obd_quotactl *oqctl_tmp;
-
- oqctl_tmp = kzalloc(sizeof(*oqctl_tmp), GFP_NOFS);
- if (!oqctl_tmp) {
- rc = -ENOMEM;
- goto out;
- }
-
- oqctl_tmp->qc_cmd = Q_GETOQUOTA;
- oqctl_tmp->qc_id = oqctl->qc_id;
- oqctl_tmp->qc_type = oqctl->qc_type;
-
- /* collect space usage from OSTs */
- oqctl_tmp->qc_dqblk.dqb_curspace = 0;
- rc = obd_quotactl(sbi->ll_dt_exp, oqctl_tmp);
- if (!rc || rc == -EREMOTEIO) {
- oqctl->qc_dqblk.dqb_curspace =
- oqctl_tmp->qc_dqblk.dqb_curspace;
- oqctl->qc_dqblk.dqb_valid |= QIF_SPACE;
- }
-
- /* collect space & inode usage from MDTs */
- oqctl_tmp->qc_dqblk.dqb_curspace = 0;
- oqctl_tmp->qc_dqblk.dqb_curinodes = 0;
- rc = obd_quotactl(sbi->ll_md_exp, oqctl_tmp);
- if (!rc || rc == -EREMOTEIO) {
- oqctl->qc_dqblk.dqb_curspace +=
- oqctl_tmp->qc_dqblk.dqb_curspace;
- oqctl->qc_dqblk.dqb_curinodes =
- oqctl_tmp->qc_dqblk.dqb_curinodes;
- oqctl->qc_dqblk.dqb_valid |= QIF_INODES;
- } else {
- oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE;
- }
-
- kfree(oqctl_tmp);
- }
-out:
- QCTL_COPY(qctl, oqctl);
- kfree(oqctl);
- }
-
- return rc;
-}
-
-/* This function tries to get a single name component,
- * to send to the server. No actual path traversal involved,
- * so we limit to NAME_MAX
- */
-static char *ll_getname(const char __user *filename)
-{
- int ret = 0, len;
- char *tmp;
-
- tmp = kzalloc(NAME_MAX + 1, GFP_KERNEL);
- if (!tmp)
- return ERR_PTR(-ENOMEM);
-
- len = strncpy_from_user(tmp, filename, NAME_MAX + 1);
- if (len < 0)
- ret = len;
- else if (len == 0)
- ret = -ENOENT;
- else if (len > NAME_MAX && tmp[NAME_MAX] != 0)
- ret = -ENAMETOOLONG;
-
- if (ret) {
- kfree(tmp);
- tmp = ERR_PTR(ret);
- }
- return tmp;
-}
-
-#define ll_putname(filename) kfree(filename)
-
-static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct inode *inode = file_inode(file);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct obd_ioctl_data *data;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), cmd=%#x\n",
- PFID(ll_inode2fid(inode)), inode, cmd);
-
- /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
- if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
- return -ENOTTY;
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
- switch (cmd) {
- case FSFILT_IOC_GETFLAGS:
- case FSFILT_IOC_SETFLAGS:
- return ll_iocontrol(inode, file, cmd, arg);
- case FSFILT_IOC_GETVERSION_OLD:
- case FSFILT_IOC_GETVERSION:
- return put_user(inode->i_generation, (int __user *)arg);
- /* We need to special case any other ioctls we want to handle,
- * to send them to the MDS/OST as appropriate and to properly
- * network encode the arg field.
- case FSFILT_IOC_SETVERSION_OLD:
- case FSFILT_IOC_SETVERSION:
- */
- case LL_IOC_GET_MDTIDX: {
- int mdtidx;
-
- mdtidx = ll_get_mdt_idx(inode);
- if (mdtidx < 0)
- return mdtidx;
-
- if (put_user((int)mdtidx, (int __user *)arg))
- return -EFAULT;
-
- return 0;
- }
- case IOC_MDC_LOOKUP: {
- int namelen, len = 0;
- char *buf = NULL;
- char *filename;
-
- rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
- if (rc)
- return rc;
- data = (void *)buf;
-
- filename = data->ioc_inlbuf1;
- namelen = strlen(filename);
-
- if (namelen < 1) {
- CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
- rc = -EINVAL;
- goto out_free;
- }
-
- rc = ll_get_fid_by_name(inode, filename, namelen, NULL, NULL);
- if (rc < 0) {
- CERROR("%s: lookup %.*s failed: rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), namelen,
- filename, rc);
- goto out_free;
- }
-out_free:
- kvfree(buf);
- return rc;
- }
- case LL_IOC_LMV_SETSTRIPE: {
- struct lmv_user_md *lum;
- char *buf = NULL;
- char *filename;
- int namelen = 0;
- int lumlen = 0;
- umode_t mode;
- int len;
- int rc;
-
- rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
- if (rc)
- return rc;
-
- data = (void *)buf;
- if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
- data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0) {
- rc = -EINVAL;
- goto lmv_out_free;
- }
-
- filename = data->ioc_inlbuf1;
- namelen = data->ioc_inllen1;
-
- if (namelen < 1) {
- CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
- rc = -EINVAL;
- goto lmv_out_free;
- }
- lum = (struct lmv_user_md *)data->ioc_inlbuf2;
- lumlen = data->ioc_inllen2;
-
- if (lum->lum_magic != LMV_USER_MAGIC ||
- lumlen != sizeof(*lum)) {
- CERROR("%s: wrong lum magic %x or size %d: rc = %d\n",
- filename, lum->lum_magic, lumlen, -EFAULT);
- rc = -EINVAL;
- goto lmv_out_free;
- }
-
-#if OBD_OCD_VERSION(2, 9, 50, 0) > LUSTRE_VERSION_CODE
- mode = data->ioc_type != 0 ? data->ioc_type : 0777;
-#else
- mode = data->ioc_type;
-#endif
- rc = ll_dir_setdirstripe(inode, lum, filename, mode);
-lmv_out_free:
- kvfree(buf);
- return rc;
- }
- case LL_IOC_LMV_SET_DEFAULT_STRIPE: {
- struct lmv_user_md __user *ulump;
- struct lmv_user_md lum;
- int rc;
-
- ulump = (struct lmv_user_md __user *)arg;
- if (copy_from_user(&lum, ulump, sizeof(lum)))
- return -EFAULT;
-
- if (lum.lum_magic != LMV_USER_MAGIC)
- return -EINVAL;
-
- rc = ll_dir_setstripe(inode, (struct lov_user_md *)&lum, 0);
-
- return rc;
- }
- case LL_IOC_LOV_SETSTRIPE: {
- struct lov_user_md_v3 lumv3;
- struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
- struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
- struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
-
- int set_default = 0;
-
- LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
- LASSERT(sizeof(lumv3.lmm_objects[0]) ==
- sizeof(lumv3p->lmm_objects[0]));
- /* first try with v1 which is smaller than v3 */
- if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1)))
- return -EFAULT;
-
- if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
- if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3)))
- return -EFAULT;
- }
-
- if (is_root_inode(inode))
- set_default = 1;
-
- /* in v1 and v3 cases lumv1 points to data */
- rc = ll_dir_setstripe(inode, lumv1, set_default);
-
- return rc;
- }
- case LL_IOC_LMV_GETSTRIPE: {
- struct lmv_user_md __user *ulmv;
- struct lmv_user_md lum;
- struct ptlrpc_request *request = NULL;
- struct lmv_user_md *tmp = NULL;
- union lmv_mds_md *lmm = NULL;
- u64 valid = 0;
- int max_stripe_count;
- int stripe_count;
- int mdt_index;
- int lum_size;
- int lmmsize;
- int rc;
- int i;
-
- ulmv = (struct lmv_user_md __user *)arg;
- if (copy_from_user(&lum, ulmv, sizeof(*ulmv)))
- return -EFAULT;
-
- max_stripe_count = lum.lum_stripe_count;
- /*
- * lum_magic will indicate which stripe the ioctl will like
- * to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC
- * is for default LMV stripe
- */
- if (lum.lum_magic == LMV_MAGIC_V1)
- valid |= OBD_MD_MEA;
- else if (lum.lum_magic == LMV_USER_MAGIC)
- valid |= OBD_MD_DEFAULT_MEA;
- else
- return -EINVAL;
-
- rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize, &request,
- valid);
- if (rc)
- goto finish_req;
-
- /* Get default LMV EA */
- if (lum.lum_magic == LMV_USER_MAGIC) {
- if (lmmsize > sizeof(*ulmv)) {
- rc = -EINVAL;
- goto finish_req;
- }
-
- if (copy_to_user(ulmv, lmm, lmmsize))
- rc = -EFAULT;
-
- goto finish_req;
- }
-
- stripe_count = lmv_mds_md_stripe_count_get(lmm);
- if (max_stripe_count < stripe_count) {
- lum.lum_stripe_count = stripe_count;
- if (copy_to_user(ulmv, &lum, sizeof(lum))) {
- rc = -EFAULT;
- goto finish_req;
- }
- rc = -E2BIG;
- goto finish_req;
- }
-
- lum_size = lmv_user_md_size(stripe_count, LMV_MAGIC_V1);
- tmp = kzalloc(lum_size, GFP_NOFS);
- if (!tmp) {
- rc = -ENOMEM;
- goto finish_req;
- }
-
- mdt_index = ll_get_mdt_idx(inode);
- if (mdt_index < 0) {
- rc = -ENOMEM;
- goto out_tmp;
- }
- tmp->lum_magic = LMV_MAGIC_V1;
- tmp->lum_stripe_count = 0;
- tmp->lum_stripe_offset = mdt_index;
- for (i = 0; i < stripe_count; i++) {
- struct lu_fid fid;
-
- fid_le_to_cpu(&fid, &lmm->lmv_md_v1.lmv_stripe_fids[i]);
- mdt_index = ll_get_mdt_idx_by_fid(sbi, &fid);
- if (mdt_index < 0) {
- rc = mdt_index;
- goto out_tmp;
- }
- tmp->lum_objects[i].lum_mds = mdt_index;
- tmp->lum_objects[i].lum_fid = fid;
- tmp->lum_stripe_count++;
- }
-
- if (copy_to_user(ulmv, tmp, lum_size)) {
- rc = -EFAULT;
- goto out_tmp;
- }
-out_tmp:
- kfree(tmp);
-finish_req:
- ptlrpc_req_finished(request);
- return rc;
- }
-
- case LL_IOC_LOV_SWAP_LAYOUTS:
- return -EPERM;
- case IOC_OBD_STATFS:
- return ll_obd_statfs(inode, (void __user *)arg);
- case LL_IOC_LOV_GETSTRIPE:
- case LL_IOC_MDC_GETINFO:
- case IOC_MDC_GETFILEINFO:
- case IOC_MDC_GETFILESTRIPE: {
- struct ptlrpc_request *request = NULL;
- struct lov_user_md __user *lump;
- struct lov_mds_md *lmm = NULL;
- struct mdt_body *body;
- char *filename = NULL;
- int lmmsize;
-
- if (cmd == IOC_MDC_GETFILEINFO ||
- cmd == IOC_MDC_GETFILESTRIPE) {
- filename = ll_getname((const char __user *)arg);
- if (IS_ERR(filename))
- return PTR_ERR(filename);
-
- rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
- &lmmsize, &request);
- } else {
- rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize,
- &request, 0);
- }
-
- if (request) {
- body = req_capsule_server_get(&request->rq_pill,
- &RMF_MDT_BODY);
- LASSERT(body);
- } else {
- goto out_req;
- }
-
- if (rc < 0) {
- if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO ||
- cmd == LL_IOC_MDC_GETINFO)) {
- rc = 0;
- goto skip_lmm;
- }
-
- goto out_req;
- }
-
- if (cmd == IOC_MDC_GETFILESTRIPE ||
- cmd == LL_IOC_LOV_GETSTRIPE) {
- lump = (struct lov_user_md __user *)arg;
- } else {
- struct lov_user_mds_data __user *lmdp;
-
- lmdp = (struct lov_user_mds_data __user *)arg;
- lump = &lmdp->lmd_lmm;
- }
- if (copy_to_user(lump, lmm, lmmsize)) {
- if (copy_to_user(lump, lmm, sizeof(*lump))) {
- rc = -EFAULT;
- goto out_req;
- }
- rc = -EOVERFLOW;
- }
-skip_lmm:
- if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
- struct lov_user_mds_data __user *lmdp;
- lstat_t st = { 0 };
-
- st.st_dev = inode->i_sb->s_dev;
- st.st_mode = body->mbo_mode;
- st.st_nlink = body->mbo_nlink;
- st.st_uid = body->mbo_uid;
- st.st_gid = body->mbo_gid;
- st.st_rdev = body->mbo_rdev;
- st.st_size = body->mbo_size;
- st.st_blksize = PAGE_SIZE;
- st.st_blocks = body->mbo_blocks;
- st.st_atime = body->mbo_atime;
- st.st_mtime = body->mbo_mtime;
- st.st_ctime = body->mbo_ctime;
- st.st_ino = cl_fid_build_ino(&body->mbo_fid1,
- sbi->ll_flags &
- LL_SBI_32BIT_API);
-
- lmdp = (struct lov_user_mds_data __user *)arg;
- if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) {
- rc = -EFAULT;
- goto out_req;
- }
- }
-
-out_req:
- ptlrpc_req_finished(request);
- if (filename)
- ll_putname(filename);
- return rc;
- }
- case OBD_IOC_QUOTACTL: {
- struct if_quotactl *qctl;
-
- qctl = kzalloc(sizeof(*qctl), GFP_NOFS);
- if (!qctl)
- return -ENOMEM;
-
- if (copy_from_user(qctl, (void __user *)arg, sizeof(*qctl))) {
- rc = -EFAULT;
- goto out_quotactl;
- }
-
- rc = quotactl_ioctl(sbi, qctl);
-
- if (rc == 0 && copy_to_user((void __user *)arg, qctl,
- sizeof(*qctl)))
- rc = -EFAULT;
-
-out_quotactl:
- kfree(qctl);
- return rc;
- }
- case OBD_IOC_GETDTNAME:
- case OBD_IOC_GETMDNAME:
- return ll_get_obd_name(inode, cmd, arg);
- case LL_IOC_FLUSHCTX:
- return ll_flush_ctx(inode);
- case LL_IOC_GETOBDCOUNT: {
- int count, vallen;
- struct obd_export *exp;
-
- if (copy_from_user(&count, (int __user *)arg, sizeof(int)))
- return -EFAULT;
-
- /* get ost count when count is zero, get mdt count otherwise */
- exp = count ? sbi->ll_md_exp : sbi->ll_dt_exp;
- vallen = sizeof(count);
- rc = obd_get_info(NULL, exp, sizeof(KEY_TGT_COUNT),
- KEY_TGT_COUNT, &vallen, &count);
- if (rc) {
- CERROR("get target count failed: %d\n", rc);
- return rc;
- }
-
- if (copy_to_user((int __user *)arg, &count, sizeof(int)))
- return -EFAULT;
-
- return 0;
- }
- case LL_IOC_PATH2FID:
- if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
- sizeof(struct lu_fid)))
- return -EFAULT;
- return 0;
- case LL_IOC_GET_CONNECT_FLAGS: {
- return obd_iocontrol(cmd, sbi->ll_md_exp, 0, NULL,
- (void __user *)arg);
- }
- case OBD_IOC_CHANGELOG_SEND:
- case OBD_IOC_CHANGELOG_CLEAR:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
- sizeof(struct ioc_changelog));
- return rc;
- case OBD_IOC_FID2PATH:
- return ll_fid2path(inode, (void __user *)arg);
- case LL_IOC_GETPARENT:
- return ll_getparent(file, (void __user *)arg);
- case LL_IOC_FID2MDTIDX: {
- struct obd_export *exp = ll_i2mdexp(inode);
- struct lu_fid fid;
- __u32 index;
-
- if (copy_from_user(&fid, (const struct lu_fid __user *)arg,
- sizeof(fid)))
- return -EFAULT;
-
- /* Call mdc_iocontrol */
- rc = obd_iocontrol(LL_IOC_FID2MDTIDX, exp, sizeof(fid), &fid,
- &index);
- if (rc)
- return rc;
-
- return index;
- }
- case LL_IOC_HSM_REQUEST: {
- struct hsm_user_request *hur;
- ssize_t totalsize;
-
- hur = memdup_user((void __user *)arg, sizeof(*hur));
- if (IS_ERR(hur))
- return PTR_ERR(hur);
-
- /* Compute the whole struct size */
- totalsize = hur_len(hur);
- kfree(hur);
- if (totalsize < 0)
- return -E2BIG;
-
- /* Final size will be more than double totalsize */
- if (totalsize >= MDS_MAXREQSIZE / 3)
- return -E2BIG;
-
- hur = kzalloc(totalsize, GFP_NOFS);
- if (!hur)
- return -ENOMEM;
-
- /* Copy the whole struct */
- if (copy_from_user(hur, (void __user *)arg, totalsize)) {
- kvfree(hur);
- return -EFAULT;
- }
-
- if (hur->hur_request.hr_action == HUA_RELEASE) {
- const struct lu_fid *fid;
- struct inode *f;
- int i;
-
- for (i = 0; i < hur->hur_request.hr_itemcount; i++) {
- fid = &hur->hur_user_item[i].hui_fid;
- f = search_inode_for_lustre(inode->i_sb, fid);
- if (IS_ERR(f)) {
- rc = PTR_ERR(f);
- break;
- }
-
- rc = ll_hsm_release(f);
- iput(f);
- if (rc != 0)
- break;
- }
- } else {
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize,
- hur, NULL);
- }
-
- kvfree(hur);
-
- return rc;
- }
- case LL_IOC_HSM_PROGRESS: {
- struct hsm_progress_kernel hpk;
- struct hsm_progress hp;
-
- if (copy_from_user(&hp, (void __user *)arg, sizeof(hp)))
- return -EFAULT;
-
- hpk.hpk_fid = hp.hp_fid;
- hpk.hpk_cookie = hp.hp_cookie;
- hpk.hpk_extent = hp.hp_extent;
- hpk.hpk_flags = hp.hp_flags;
- hpk.hpk_errval = hp.hp_errval;
- hpk.hpk_data_version = 0;
-
- /* File may not exist in Lustre; all progress
- * reported to Lustre root
- */
- rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(hpk), &hpk,
- NULL);
- return rc;
- }
- case LL_IOC_HSM_CT_START:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
- sizeof(struct lustre_kernelcomm));
- return rc;
-
- case LL_IOC_HSM_COPY_START: {
- struct hsm_copy *copy;
- int rc;
-
- copy = memdup_user((char __user *)arg, sizeof(*copy));
- if (IS_ERR(copy))
- return PTR_ERR(copy);
-
- rc = ll_ioc_copy_start(inode->i_sb, copy);
- if (copy_to_user((char __user *)arg, copy, sizeof(*copy)))
- rc = -EFAULT;
-
- kfree(copy);
- return rc;
- }
- case LL_IOC_HSM_COPY_END: {
- struct hsm_copy *copy;
- int rc;
-
- copy = memdup_user((char __user *)arg, sizeof(*copy));
- if (IS_ERR(copy))
- return PTR_ERR(copy);
-
- rc = ll_ioc_copy_end(inode->i_sb, copy);
- if (copy_to_user((char __user *)arg, copy, sizeof(*copy)))
- rc = -EFAULT;
-
- kfree(copy);
- return rc;
- }
- case LL_IOC_MIGRATE: {
- char *buf = NULL;
- const char *filename;
- int namelen = 0;
- int len;
- int rc;
- int mdtidx;
-
- rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
- if (rc < 0)
- return rc;
-
- data = (struct obd_ioctl_data *)buf;
- if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
- !data->ioc_inllen1 || !data->ioc_inllen2) {
- rc = -EINVAL;
- goto migrate_free;
- }
-
- filename = data->ioc_inlbuf1;
- namelen = data->ioc_inllen1;
- if (namelen < 1 || namelen != strlen(filename) + 1) {
- rc = -EINVAL;
- goto migrate_free;
- }
-
- if (data->ioc_inllen2 != sizeof(mdtidx)) {
- rc = -EINVAL;
- goto migrate_free;
- }
- mdtidx = *(int *)data->ioc_inlbuf2;
-
- rc = ll_migrate(inode, file, mdtidx, filename, namelen - 1);
-migrate_free:
- kvfree(buf);
-
- return rc;
- }
-
- default:
- return obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL,
- (void __user *)arg);
- }
-}
-
-static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
-{
- struct inode *inode = file->f_mapping->host;
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int api32 = ll_need_32bit_api(sbi);
- loff_t ret = -EINVAL;
-
- switch (origin) {
- case SEEK_SET:
- break;
- case SEEK_CUR:
- offset += file->f_pos;
- break;
- case SEEK_END:
- if (offset > 0)
- goto out;
- if (api32)
- offset += LL_DIR_END_OFF_32BIT;
- else
- offset += LL_DIR_END_OFF;
- break;
- default:
- goto out;
- }
-
- if (offset >= 0 &&
- ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
- (!api32 && offset <= LL_DIR_END_OFF))) {
- if (offset != file->f_pos) {
- if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
- (!api32 && offset == LL_DIR_END_OFF))
- fd->lfd_pos = MDS_DIR_END_OFF;
- else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
- fd->lfd_pos = offset << 32;
- else
- fd->lfd_pos = offset;
- file->f_pos = offset;
- }
- ret = offset;
- }
- goto out;
-
-out:
- return ret;
-}
-
-static int ll_dir_open(struct inode *inode, struct file *file)
-{
- return ll_file_open(inode, file);
-}
-
-static int ll_dir_release(struct inode *inode, struct file *file)
-{
- return ll_file_release(inode, file);
-}
-
-const struct file_operations ll_dir_operations = {
- .llseek = ll_dir_seek,
- .open = ll_dir_open,
- .release = ll_dir_release,
- .read = generic_read_dir,
- .iterate_shared = ll_readdir,
- .unlocked_ioctl = ll_dir_ioctl,
- .fsync = ll_fsync,
-};
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
deleted file mode 100644
index ca5faea13b7e..000000000000
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ /dev/null
@@ -1,3600 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/file.c
- *
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-#include <lustre_dlm.h>
-#include <linux/pagemap.h>
-#include <linux/file.h>
-#include <linux/sched.h>
-#include <linux/mount.h>
-#include <uapi/linux/lustre/lustre_fiemap.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_swab.h>
-
-#include <cl_object.h>
-#include "llite_internal.h"
-
-static int
-ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
-
-static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
- bool *lease_broken);
-
-static enum llioc_iter
-ll_iocontrol_call(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg, int *rcp);
-
-static struct ll_file_data *ll_file_data_get(void)
-{
- struct ll_file_data *fd;
-
- fd = kmem_cache_zalloc(ll_file_data_slab, GFP_NOFS);
- if (!fd)
- return NULL;
- fd->fd_write_failed = false;
- return fd;
-}
-
-static void ll_file_data_put(struct ll_file_data *fd)
-{
- if (fd)
- kmem_cache_free(ll_file_data_slab, fd);
-}
-
-/**
- * Packs all the attributes into @op_data for the CLOSE rpc.
- */
-static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
- struct obd_client_handle *och)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- ll_prep_md_op_data(op_data, inode, NULL, NULL,
- 0, 0, LUSTRE_OPC_ANY, NULL);
-
- op_data->op_attr.ia_mode = inode->i_mode;
- op_data->op_attr.ia_atime = inode->i_atime;
- op_data->op_attr.ia_mtime = inode->i_mtime;
- op_data->op_attr.ia_ctime = inode->i_ctime;
- op_data->op_attr.ia_size = i_size_read(inode);
- op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
- ATTR_MTIME | ATTR_MTIME_SET |
- ATTR_CTIME | ATTR_CTIME_SET;
- op_data->op_attr_blocks = inode->i_blocks;
- op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
- op_data->op_handle = och->och_fh;
-
- /*
- * For HSM: if inode data has been modified, pack it so that
- * MDT can set data dirty flag in the archive.
- */
- if (och->och_flags & FMODE_WRITE &&
- test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags))
- op_data->op_bias |= MDS_DATA_MODIFIED;
-}
-
-/**
- * Perform a close, possibly with a bias.
- * The meaning of "data" depends on the value of "bias".
- *
- * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
- * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
- * swap layouts with.
- */
-static int ll_close_inode_openhandle(struct inode *inode,
- struct obd_client_handle *och,
- enum mds_op_bias bias,
- void *data)
-{
- const struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_export *md_exp = ll_i2mdexp(inode);
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- int rc;
-
- if (!class_exp2obd(md_exp)) {
- CERROR("%s: invalid MDC connection handle closing " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid));
- rc = 0;
- goto out;
- }
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- /*
- * We leak openhandle and request here on error, but not much to be
- * done in OOM case since app won't retry close on error either.
- */
- if (!op_data) {
- rc = -ENOMEM;
- goto out;
- }
-
- ll_prepare_close(inode, op_data, och);
- switch (bias) {
- case MDS_CLOSE_LAYOUT_SWAP:
- LASSERT(data);
- op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
- op_data->op_data_version = 0;
- op_data->op_lease_handle = och->och_lease_handle;
- op_data->op_fid2 = *ll_inode2fid(data);
- break;
-
- case MDS_HSM_RELEASE:
- LASSERT(data);
- op_data->op_bias |= MDS_HSM_RELEASE;
- op_data->op_data_version = *(__u64 *)data;
- op_data->op_lease_handle = och->och_lease_handle;
- op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
- break;
-
- default:
- LASSERT(!data);
- break;
- }
-
- rc = md_close(md_exp, op_data, och->och_mod, &req);
- if (rc && rc != -EINTR) {
- CERROR("%s: inode " DFID " mdc close failed: rc = %d\n",
- md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
- }
-
- if (op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP) &&
- !rc) {
- struct mdt_body *body;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
- rc = -EBUSY;
- }
-
- ll_finish_md_op_data(op_data);
-
-out:
- md_clear_open_replay_data(md_exp, och);
- och->och_fh.cookie = DEAD_HANDLE_MAGIC;
- kfree(och);
-
- ptlrpc_req_finished(req);
- return rc;
-}
-
-int ll_md_real_close(struct inode *inode, fmode_t fmode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_client_handle **och_p;
- struct obd_client_handle *och;
- __u64 *och_usecount;
- int rc = 0;
-
- if (fmode & FMODE_WRITE) {
- och_p = &lli->lli_mds_write_och;
- och_usecount = &lli->lli_open_fd_write_count;
- } else if (fmode & FMODE_EXEC) {
- och_p = &lli->lli_mds_exec_och;
- och_usecount = &lli->lli_open_fd_exec_count;
- } else {
- LASSERT(fmode & FMODE_READ);
- och_p = &lli->lli_mds_read_och;
- och_usecount = &lli->lli_open_fd_read_count;
- }
-
- mutex_lock(&lli->lli_och_mutex);
- if (*och_usecount > 0) {
- /* There are still users of this handle, so skip
- * freeing it.
- */
- mutex_unlock(&lli->lli_och_mutex);
- return 0;
- }
-
- och = *och_p;
- *och_p = NULL;
- mutex_unlock(&lli->lli_och_mutex);
-
- if (och) {
- /* There might be a race and this handle may already
- * be closed.
- */
- rc = ll_close_inode_openhandle(inode, och, 0, NULL);
- }
-
- return rc;
-}
-
-static int ll_md_close(struct inode *inode, struct file *file)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_inode_info *lli = ll_i2info(inode);
- int lockmode;
- __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
- struct lustre_handle lockh;
- union ldlm_policy_data policy = {
- .l_inodebits = { MDS_INODELOCK_OPEN }
- };
- int rc = 0;
-
- /* clear group lock, if present */
- if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
- ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
-
- if (fd->fd_lease_och) {
- bool lease_broken;
-
- /* Usually the lease is not released when the
- * application crashed, we need to release here.
- */
- rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
- CDEBUG(rc ? D_ERROR : D_INODE,
- "Clean up lease " DFID " %d/%d\n",
- PFID(&lli->lli_fid), rc, lease_broken);
-
- fd->fd_lease_och = NULL;
- }
-
- if (fd->fd_och) {
- rc = ll_close_inode_openhandle(inode, fd->fd_och, 0, NULL);
- fd->fd_och = NULL;
- goto out;
- }
-
- /* Let's see if we have good enough OPEN lock on the file and if
- * we can skip talking to MDS
- */
-
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_omode & FMODE_WRITE) {
- lockmode = LCK_CW;
- LASSERT(lli->lli_open_fd_write_count);
- lli->lli_open_fd_write_count--;
- } else if (fd->fd_omode & FMODE_EXEC) {
- lockmode = LCK_PR;
- LASSERT(lli->lli_open_fd_exec_count);
- lli->lli_open_fd_exec_count--;
- } else {
- lockmode = LCK_CR;
- LASSERT(lli->lli_open_fd_read_count);
- lli->lli_open_fd_read_count--;
- }
- mutex_unlock(&lli->lli_och_mutex);
-
- if (!md_lock_match(ll_i2mdexp(inode), flags, ll_inode2fid(inode),
- LDLM_IBITS, &policy, lockmode, &lockh))
- rc = ll_md_real_close(inode, fd->fd_omode);
-
-out:
- LUSTRE_FPRIVATE(file) = NULL;
- ll_file_data_put(fd);
-
- return rc;
-}
-
-/* While this returns an error code, fput() the caller does not, so we need
- * to make every effort to clean up all of our state here. Also, applications
- * rarely check close errors and even if an error is returned they will not
- * re-try the close call.
- */
-int ll_file_release(struct inode *inode, struct file *file)
-{
- struct ll_file_data *fd;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- if (!is_root_inode(inode))
- ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
- fd = LUSTRE_FPRIVATE(file);
- LASSERT(fd);
-
- /* The last ref on @file, maybe not be the owner pid of statahead,
- * because parent and child process can share the same file handle.
- */
- if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
- ll_deauthorize_statahead(inode, fd);
-
- if (is_root_inode(inode)) {
- LUSTRE_FPRIVATE(file) = NULL;
- ll_file_data_put(fd);
- return 0;
- }
-
- if (!S_ISDIR(inode->i_mode)) {
- if (lli->lli_clob)
- lov_read_and_clear_async_rc(lli->lli_clob);
- lli->lli_async_rc = 0;
- }
-
- rc = ll_md_close(inode, file);
-
- if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
- libcfs_debug_dumplog();
-
- return rc;
-}
-
-static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
- struct lookup_intent *itp)
-{
- struct inode *inode = d_inode(de);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct dentry *parent = de->d_parent;
- const char *name = NULL;
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- int len = 0, rc;
-
- LASSERT(parent);
- LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
-
- /*
- * if server supports open-by-fid, or file name is invalid, don't pack
- * name in open request
- */
- if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
- lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
- name = de->d_name.name;
- len = de->d_name.len;
- }
-
- op_data = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len,
- O_RDWR, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
- op_data->op_data = lmm;
- op_data->op_data_size = lmmsize;
-
- rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
- &ll_md_blocking_ast, 0);
- ll_finish_md_op_data(op_data);
- if (rc == -ESTALE) {
- /* reason for keep own exit path - don`t flood log
- * with messages with -ESTALE errors.
- */
- if (!it_disposition(itp, DISP_OPEN_OPEN) ||
- it_open_error(DISP_OPEN_OPEN, itp))
- goto out;
- ll_release_openhandle(inode, itp);
- goto out;
- }
-
- if (it_disposition(itp, DISP_LOOKUP_NEG)) {
- rc = -ENOENT;
- goto out;
- }
-
- if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
- rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
- CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
- goto out;
- }
-
- rc = ll_prep_inode(&inode, req, NULL, itp);
- if (!rc && itp->it_lock_mode)
- ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL);
-
-out:
- ptlrpc_req_finished(req);
- ll_intent_drop_lock(itp);
-
- /*
- * We did open by fid, but by the time we got to the server,
- * the object disappeared. If this is a create, we cannot really
- * tell the userspace that the file it was trying to create
- * does not exist. Instead let's return -ESTALE, and the VFS will
- * retry the create with LOOKUP_REVAL that we are going to catch
- * in ll_revalidate_dentry() and use lookup then.
- */
- if (rc == -ENOENT && itp->it_op & IT_CREAT)
- rc = -ESTALE;
-
- return rc;
-}
-
-static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
- struct obd_client_handle *och)
-{
- struct mdt_body *body;
-
- body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
- och->och_fh = body->mbo_handle;
- och->och_fid = body->mbo_fid1;
- och->och_lease_handle.cookie = it->it_lock_handle;
- och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
- och->och_flags = it->it_flags;
-
- return md_set_open_replay_data(md_exp, och, it);
-}
-
-static int ll_local_open(struct file *file, struct lookup_intent *it,
- struct ll_file_data *fd, struct obd_client_handle *och)
-{
- struct inode *inode = file_inode(file);
-
- LASSERT(!LUSTRE_FPRIVATE(file));
-
- LASSERT(fd);
-
- if (och) {
- int rc;
-
- rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
- if (rc != 0)
- return rc;
- }
-
- LUSTRE_FPRIVATE(file) = fd;
- ll_readahead_init(inode, &fd->fd_ras);
- fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
-
- /* ll_cl_context initialize */
- rwlock_init(&fd->fd_lock);
- INIT_LIST_HEAD(&fd->fd_lccs);
-
- return 0;
-}
-
-/* Open a file, and (for the very first open) create objects on the OSTs at
- * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
- * creation or open until ll_lov_setstripe() ioctl is called.
- *
- * If we already have the stripe MD locally then we don't request it in
- * md_open(), by passing a lmm_size = 0.
- *
- * It is up to the application to ensure no other processes open this file
- * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
- * used. We might be able to avoid races of that sort by getting lli_open_sem
- * before returning in the O_LOV_DELAY_CREATE case and dropping it here
- * or in ll_file_release(), but I'm not sure that is desirable/necessary.
- */
-int ll_file_open(struct inode *inode, struct file *file)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lookup_intent *it, oit = { .it_op = IT_OPEN,
- .it_flags = file->f_flags };
- struct obd_client_handle **och_p = NULL;
- __u64 *och_usecount = NULL;
- struct ll_file_data *fd;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), flags %o\n",
- PFID(ll_inode2fid(inode)), inode, file->f_flags);
-
- it = file->private_data; /* XXX: compat macro */
- file->private_data = NULL; /* prevent ll_local_open assertion */
-
- fd = ll_file_data_get();
- if (!fd) {
- rc = -ENOMEM;
- goto out_openerr;
- }
-
- fd->fd_file = file;
- if (S_ISDIR(inode->i_mode))
- ll_authorize_statahead(inode, fd);
-
- if (is_root_inode(inode)) {
- LUSTRE_FPRIVATE(file) = fd;
- return 0;
- }
-
- if (!it || !it->it_disposition) {
- /* Convert f_flags into access mode. We cannot use file->f_mode,
- * because everything but O_ACCMODE mask was stripped from
- * there
- */
- if ((oit.it_flags + 1) & O_ACCMODE)
- oit.it_flags++;
- if (file->f_flags & O_TRUNC)
- oit.it_flags |= FMODE_WRITE;
-
- /* kernel only call f_op->open in dentry_open. filp_open calls
- * dentry_open after call to open_namei that checks permissions.
- * Only nfsd_open call dentry_open directly without checking
- * permissions and because of that this code below is safe.
- */
- if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
- oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
-
- /* We do not want O_EXCL here, presumably we opened the file
- * already? XXX - NFS implications?
- */
- oit.it_flags &= ~O_EXCL;
-
- /* bug20584, if "it_flags" contains O_CREAT, the file will be
- * created if necessary, then "IT_CREAT" should be set to keep
- * consistent with it
- */
- if (oit.it_flags & O_CREAT)
- oit.it_op |= IT_CREAT;
-
- it = &oit;
- }
-
-restart:
- /* Let's see if we have file open on MDS already. */
- if (it->it_flags & FMODE_WRITE) {
- och_p = &lli->lli_mds_write_och;
- och_usecount = &lli->lli_open_fd_write_count;
- } else if (it->it_flags & FMODE_EXEC) {
- och_p = &lli->lli_mds_exec_och;
- och_usecount = &lli->lli_open_fd_exec_count;
- } else {
- och_p = &lli->lli_mds_read_och;
- och_usecount = &lli->lli_open_fd_read_count;
- }
-
- mutex_lock(&lli->lli_och_mutex);
- if (*och_p) { /* Open handle is present */
- if (it_disposition(it, DISP_OPEN_OPEN)) {
- /* Well, there's extra open request that we do not need,
- * let's close it somehow. This will decref request.
- */
- rc = it_open_error(DISP_OPEN_OPEN, it);
- if (rc) {
- mutex_unlock(&lli->lli_och_mutex);
- goto out_openerr;
- }
-
- ll_release_openhandle(inode, it);
- }
- (*och_usecount)++;
-
- rc = ll_local_open(file, it, fd, NULL);
- if (rc) {
- (*och_usecount)--;
- mutex_unlock(&lli->lli_och_mutex);
- goto out_openerr;
- }
- } else {
- LASSERT(*och_usecount == 0);
- if (!it->it_disposition) {
- /* We cannot just request lock handle now, new ELC code
- * means that one of other OPEN locks for this file
- * could be cancelled, and since blocking ast handler
- * would attempt to grab och_mutex as well, that would
- * result in a deadlock
- */
- mutex_unlock(&lli->lli_och_mutex);
- /*
- * Normally called under two situations:
- * 1. NFS export.
- * 2. revalidate with IT_OPEN (revalidate doesn't
- * execute this intent any more).
- *
- * Always fetch MDS_OPEN_LOCK if this is not setstripe.
- *
- * Always specify MDS_OPEN_BY_FID because we don't want
- * to get file with different fid.
- */
- it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
- rc = ll_intent_file_open(file->f_path.dentry,
- NULL, 0, it);
- if (rc)
- goto out_openerr;
-
- goto restart;
- }
- *och_p = kzalloc(sizeof(struct obd_client_handle), GFP_NOFS);
- if (!*och_p) {
- rc = -ENOMEM;
- goto out_och_free;
- }
-
- (*och_usecount)++;
-
- /* md_intent_lock() didn't get a request ref if there was an
- * open error, so don't do cleanup on the request here
- * (bug 3430)
- */
- /* XXX (green): Should not we bail out on any error here, not
- * just open error?
- */
- rc = it_open_error(DISP_OPEN_OPEN, it);
- if (rc)
- goto out_och_free;
-
- LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
- "inode %p: disposition %x, status %d\n", inode,
- it_disposition(it, ~0), it->it_status);
-
- rc = ll_local_open(file, it, fd, *och_p);
- if (rc)
- goto out_och_free;
- }
- mutex_unlock(&lli->lli_och_mutex);
- fd = NULL;
-
- /* Must do this outside lli_och_mutex lock to prevent deadlock where
- * different kind of OPEN lock for this same inode gets cancelled
- * by ldlm_cancel_lru
- */
- if (!S_ISREG(inode->i_mode))
- goto out_och_free;
-
- cl_lov_delay_create_clear(&file->f_flags);
- goto out_och_free;
-
-out_och_free:
- if (rc) {
- if (och_p && *och_p) {
- kfree(*och_p);
- *och_p = NULL;
- (*och_usecount)--;
- }
- mutex_unlock(&lli->lli_och_mutex);
-
-out_openerr:
- if (lli->lli_opendir_key == fd)
- ll_deauthorize_statahead(inode, fd);
- if (fd)
- ll_file_data_put(fd);
- } else {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
- }
-
- if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
- ptlrpc_req_finished(it->it_request);
- it_clear_disposition(it, DISP_ENQ_OPEN_REF);
- }
-
- return rc;
-}
-
-static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
- struct ldlm_lock_desc *desc,
- void *data, int flag)
-{
- int rc;
- struct lustre_handle lockh;
-
- switch (flag) {
- case LDLM_CB_BLOCKING:
- ldlm_lock2handle(lock, &lockh);
- rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
- if (rc < 0) {
- CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
- return rc;
- }
- break;
- case LDLM_CB_CANCELING:
- /* do nothing */
- break;
- }
- return 0;
-}
-
-/**
- * Acquire a lease and open the file.
- */
-static struct obd_client_handle *
-ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
- __u64 open_flags)
-{
- struct lookup_intent it = { .it_op = IT_OPEN };
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- struct lustre_handle old_handle = { 0 };
- struct obd_client_handle *och = NULL;
- int rc;
- int rc2;
-
- if (fmode != FMODE_WRITE && fmode != FMODE_READ)
- return ERR_PTR(-EINVAL);
-
- if (file) {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct obd_client_handle **och_p;
- __u64 *och_usecount;
-
- if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
- return ERR_PTR(-EPERM);
-
- /* Get the openhandle of the file */
- rc = -EBUSY;
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- mutex_unlock(&lli->lli_och_mutex);
- return ERR_PTR(rc);
- }
-
- if (!fd->fd_och) {
- if (file->f_mode & FMODE_WRITE) {
- LASSERT(lli->lli_mds_write_och);
- och_p = &lli->lli_mds_write_och;
- och_usecount = &lli->lli_open_fd_write_count;
- } else {
- LASSERT(lli->lli_mds_read_och);
- och_p = &lli->lli_mds_read_och;
- och_usecount = &lli->lli_open_fd_read_count;
- }
- if (*och_usecount == 1) {
- fd->fd_och = *och_p;
- *och_p = NULL;
- *och_usecount = 0;
- rc = 0;
- }
- }
- mutex_unlock(&lli->lli_och_mutex);
- if (rc < 0) /* more than 1 opener */
- return ERR_PTR(rc);
-
- LASSERT(fd->fd_och);
- old_handle = fd->fd_och->och_fh;
- }
-
- och = kzalloc(sizeof(*och), GFP_NOFS);
- if (!och)
- return ERR_PTR(-ENOMEM);
-
- op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- /* To tell the MDT this openhandle is from the same owner */
- op_data->op_handle = old_handle;
-
- it.it_flags = fmode | open_flags;
- it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
- rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
- &ll_md_blocking_lease_ast,
- /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
- * it can be cancelled which may mislead applications that the lease is
- * broken;
- * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
- * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
- * doesn't deal with openhandle, so normal openhandle will be leaked.
- */
- LDLM_FL_NO_LRU | LDLM_FL_EXCL);
- ll_finish_md_op_data(op_data);
- ptlrpc_req_finished(req);
- if (rc < 0)
- goto out_release_it;
-
- if (it_disposition(&it, DISP_LOOKUP_NEG)) {
- rc = -ENOENT;
- goto out_release_it;
- }
-
- rc = it_open_error(DISP_OPEN_OPEN, &it);
- if (rc)
- goto out_release_it;
-
- LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
- ll_och_fill(sbi->ll_md_exp, &it, och);
-
- if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ {
- rc = -EOPNOTSUPP;
- goto out_close;
- }
-
- /* already get lease, handle lease lock */
- ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
- if (it.it_lock_mode == 0 ||
- it.it_lock_bits != MDS_INODELOCK_OPEN) {
- /* open lock must return for lease */
- CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
- PFID(ll_inode2fid(inode)), it.it_lock_mode,
- it.it_lock_bits);
- rc = -EPROTO;
- goto out_close;
- }
-
- ll_intent_release(&it);
- return och;
-
-out_close:
- /* Cancel open lock */
- if (it.it_lock_mode != 0) {
- ldlm_lock_decref_and_cancel(&och->och_lease_handle,
- it.it_lock_mode);
- it.it_lock_mode = 0;
- och->och_lease_handle.cookie = 0ULL;
- }
- rc2 = ll_close_inode_openhandle(inode, och, 0, NULL);
- if (rc2 < 0)
- CERROR("%s: error closing file " DFID ": %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&ll_i2info(inode)->lli_fid), rc2);
- och = NULL; /* och has been freed in ll_close_inode_openhandle() */
-out_release_it:
- ll_intent_release(&it);
-out:
- kfree(och);
- return ERR_PTR(rc);
-}
-
-/**
- * Check whether a layout swap can be done between two inodes.
- *
- * \param[in] inode1 First inode to check
- * \param[in] inode2 Second inode to check
- *
- * \retval 0 on success, layout swap can be performed between both inodes
- * \retval negative error code if requirements are not met
- */
-static int ll_check_swap_layouts_validity(struct inode *inode1,
- struct inode *inode2)
-{
- if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
- return -EINVAL;
-
- if (inode_permission(inode1, MAY_WRITE) ||
- inode_permission(inode2, MAY_WRITE))
- return -EPERM;
-
- if (inode1->i_sb != inode2->i_sb)
- return -EXDEV;
-
- return 0;
-}
-
-static int ll_swap_layouts_close(struct obd_client_handle *och,
- struct inode *inode, struct inode *inode2)
-{
- const struct lu_fid *fid1 = ll_inode2fid(inode);
- const struct lu_fid *fid2;
- int rc;
-
- CDEBUG(D_INODE, "%s: biased close of file " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
-
- rc = ll_check_swap_layouts_validity(inode, inode2);
- if (rc < 0)
- goto out_free_och;
-
- /* We now know that inode2 is a lustre inode */
- fid2 = ll_inode2fid(inode2);
-
- rc = lu_fid_cmp(fid1, fid2);
- if (!rc) {
- rc = -EINVAL;
- goto out_free_och;
- }
-
- /*
- * Close the file and swap layouts between inode & inode2.
- * NB: lease lock handle is released in mdc_close_layout_swap_pack()
- * because we still need it to pack l_remote_handle to MDT.
- */
- rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP,
- inode2);
-
- och = NULL; /* freed in ll_close_inode_openhandle() */
-
-out_free_och:
- kfree(och);
- return rc;
-}
-
-/**
- * Release lease and close the file.
- * It will check if the lease has ever broken.
- */
-static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
- bool *lease_broken)
-{
- struct ldlm_lock *lock;
- bool cancelled = true;
-
- lock = ldlm_handle2lock(&och->och_lease_handle);
- if (lock) {
- lock_res_and_lock(lock);
- cancelled = ldlm_is_cancel(lock);
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
- }
-
- CDEBUG(D_INODE, "lease for " DFID " broken? %d\n",
- PFID(&ll_i2info(inode)->lli_fid), cancelled);
-
- if (!cancelled)
- ldlm_cli_cancel(&och->och_lease_handle, 0);
- if (lease_broken)
- *lease_broken = cancelled;
-
- return ll_close_inode_openhandle(inode, och, 0, NULL);
-}
-
-int ll_merge_attr(const struct lu_env *env, struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *obj = lli->lli_clob;
- struct cl_attr *attr = vvp_env_thread_attr(env);
- s64 atime;
- s64 mtime;
- s64 ctime;
- int rc = 0;
-
- ll_inode_size_lock(inode);
-
- /* merge timestamps the most recently obtained from mds with
- * timestamps obtained from osts
- */
- LTIME_S(inode->i_atime) = lli->lli_atime;
- LTIME_S(inode->i_mtime) = lli->lli_mtime;
- LTIME_S(inode->i_ctime) = lli->lli_ctime;
-
- mtime = LTIME_S(inode->i_mtime);
- atime = LTIME_S(inode->i_atime);
- ctime = LTIME_S(inode->i_ctime);
-
- cl_object_attr_lock(obj);
- rc = cl_object_attr_get(env, obj, attr);
- cl_object_attr_unlock(obj);
-
- if (rc != 0)
- goto out_size_unlock;
-
- if (atime < attr->cat_atime)
- atime = attr->cat_atime;
-
- if (ctime < attr->cat_ctime)
- ctime = attr->cat_ctime;
-
- if (mtime < attr->cat_mtime)
- mtime = attr->cat_mtime;
-
- CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
- PFID(&lli->lli_fid), attr->cat_size);
-
- i_size_write(inode, attr->cat_size);
-
- inode->i_blocks = attr->cat_blocks;
-
- LTIME_S(inode->i_mtime) = mtime;
- LTIME_S(inode->i_atime) = atime;
- LTIME_S(inode->i_ctime) = ctime;
-
-out_size_unlock:
- ll_inode_size_unlock(inode);
-
- return rc;
-}
-
-static bool file_is_noatime(const struct file *file)
-{
- const struct vfsmount *mnt = file->f_path.mnt;
- const struct inode *inode = file_inode(file);
-
- /* Adapted from file_accessed() and touch_atime().*/
- if (file->f_flags & O_NOATIME)
- return true;
-
- if (inode->i_flags & S_NOATIME)
- return true;
-
- if (IS_NOATIME(inode))
- return true;
-
- if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
- return true;
-
- if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
- return true;
-
- if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
- return true;
-
- return false;
-}
-
-static void ll_io_init(struct cl_io *io, const struct file *file, int write)
-{
- struct inode *inode = file_inode(file);
-
- io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
- if (write) {
- io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
- io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
- file->f_flags & O_DIRECT ||
- IS_SYNC(inode);
- }
- io->ci_obj = ll_i2info(inode)->lli_clob;
- io->ci_lockreq = CILR_MAYBE;
- if (ll_file_nolock(file)) {
- io->ci_lockreq = CILR_NEVER;
- io->ci_no_srvlock = 1;
- } else if (file->f_flags & O_APPEND) {
- io->ci_lockreq = CILR_MANDATORY;
- }
-
- io->ci_noatime = file_is_noatime(file);
-}
-
-static ssize_t
-ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
- struct file *file, enum cl_io_type iot,
- loff_t *ppos, size_t count)
-{
- struct ll_inode_info *lli = ll_i2info(file_inode(file));
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct vvp_io *vio = vvp_env_io(env);
- struct range_lock range;
- struct cl_io *io;
- ssize_t result = 0;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zu\n",
- file, iot, *ppos, count);
-
-restart:
- io = vvp_env_thread_io(env);
- ll_io_init(io, file, iot == CIT_WRITE);
-
- if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
- struct vvp_io *vio = vvp_env_io(env);
- bool range_locked = false;
-
- if (file->f_flags & O_APPEND)
- range_lock_init(&range, 0, LUSTRE_EOF);
- else
- range_lock_init(&range, *ppos, *ppos + count - 1);
-
- vio->vui_fd = LUSTRE_FPRIVATE(file);
- vio->vui_iter = args->u.normal.via_iter;
- vio->vui_iocb = args->u.normal.via_iocb;
- /*
- * Direct IO reads must also take range lock,
- * or multiple reads will try to work on the same pages
- * See LU-6227 for details.
- */
- if (((iot == CIT_WRITE) ||
- (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
- !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
- range.rl_node.in_extent.start,
- range.rl_node.in_extent.end);
- rc = range_lock(&lli->lli_write_tree, &range);
- if (rc < 0)
- goto out;
-
- range_locked = true;
- }
- ll_cl_add(file, env, io);
- rc = cl_io_loop(env, io);
- ll_cl_remove(file, env);
- if (range_locked) {
- CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
- range.rl_node.in_extent.start,
- range.rl_node.in_extent.end);
- range_unlock(&lli->lli_write_tree, &range);
- }
- } else {
- /* cl_io_rw_init() handled IO */
- rc = io->ci_result;
- }
-
- if (io->ci_nob > 0) {
- result = io->ci_nob;
- count -= io->ci_nob;
- *ppos = io->u.ci_wr.wr.crw_pos;
-
- /* prepare IO restart */
- if (count > 0)
- args->u.normal.via_iter = vio->vui_iter;
- }
-out:
- cl_io_fini(env, io);
-
- if ((!rc || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
- CDEBUG(D_VFSTRACE,
- "%s: restart %s from %lld, count:%zu, result: %zd\n",
- file_dentry(file)->d_name.name,
- iot == CIT_READ ? "read" : "write",
- *ppos, count, result);
- goto restart;
- }
-
- if (iot == CIT_READ) {
- if (result >= 0)
- ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
- LPROC_LL_READ_BYTES, result);
- } else if (iot == CIT_WRITE) {
- if (result >= 0) {
- ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
- LPROC_LL_WRITE_BYTES, result);
- fd->fd_write_failed = false;
- } else if (!result && !rc) {
- rc = io->ci_result;
- if (rc < 0)
- fd->fd_write_failed = true;
- else
- fd->fd_write_failed = false;
- } else if (rc != -ERESTARTSYS) {
- fd->fd_write_failed = true;
- }
- }
- CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
-
- return result > 0 ? result : rc;
-}
-
-static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
- struct lu_env *env;
- struct vvp_io_args *args;
- ssize_t result;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- args = ll_env_args(env);
- args->u.normal.via_iter = to;
- args->u.normal.via_iocb = iocb;
-
- result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
- &iocb->ki_pos, iov_iter_count(to));
- cl_env_put(env, &refcheck);
- return result;
-}
-
-/*
- * Write to a file (through the page cache).
- */
-static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- struct lu_env *env;
- struct vvp_io_args *args;
- ssize_t result;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- args = ll_env_args(env);
- args->u.normal.via_iter = from;
- args->u.normal.via_iocb = iocb;
-
- result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
- &iocb->ki_pos, iov_iter_count(from));
- cl_env_put(env, &refcheck);
- return result;
-}
-
-int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
- __u64 flags, struct lov_user_md *lum,
- int lum_size)
-{
- struct lookup_intent oit = {
- .it_op = IT_OPEN,
- .it_flags = flags | MDS_OPEN_BY_FID,
- };
- int rc = 0;
-
- ll_inode_size_lock(inode);
- rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
- if (rc < 0)
- goto out_unlock;
-
- ll_release_openhandle(inode, &oit);
-
-out_unlock:
- ll_inode_size_unlock(inode);
- ll_intent_release(&oit);
- return rc;
-}
-
-int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
- struct lov_mds_md **lmmp, int *lmm_size,
- struct ptlrpc_request **request)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct mdt_body *body;
- struct lov_mds_md *lmm = NULL;
- struct ptlrpc_request *req = NULL;
- struct md_op_data *op_data;
- int rc, lmmsize;
-
- rc = ll_get_default_mdsize(sbi, &lmmsize);
- if (rc)
- return rc;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
- strlen(filename), lmmsize,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
- rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n",
- filename, rc);
- goto out;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- lmmsize = body->mbo_eadatasize;
-
- if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
- lmmsize == 0) {
- rc = -ENODATA;
- goto out;
- }
-
- lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
-
- if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
- (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
- rc = -EPROTO;
- goto out;
- }
-
- /*
- * This is coming from the MDS, so is probably in
- * little endian. We convert it to host endian before
- * passing it to userspace.
- */
- if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) {
- int stripe_count;
-
- stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
- if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
- stripe_count = 0;
-
- /* if function called for directory - we should
- * avoid swab not existent lsm objects
- */
- if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
- lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
- if (S_ISREG(body->mbo_mode))
- lustre_swab_lov_user_md_objects(
- ((struct lov_user_md_v1 *)lmm)->lmm_objects,
- stripe_count);
- } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
- lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
- if (S_ISREG(body->mbo_mode))
- lustre_swab_lov_user_md_objects(
- ((struct lov_user_md_v3 *)lmm)->lmm_objects,
- stripe_count);
- }
- }
-
-out:
- *lmmp = lmm;
- *lmm_size = lmmsize;
- *request = req;
- return rc;
-}
-
-static int ll_lov_setea(struct inode *inode, struct file *file,
- unsigned long arg)
-{
- __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
- struct lov_user_md *lump;
- int lum_size = sizeof(struct lov_user_md) +
- sizeof(struct lov_user_ost_data);
- int rc;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- lump = kzalloc(lum_size, GFP_NOFS);
- if (!lump)
- return -ENOMEM;
-
- if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
- kvfree(lump);
- return -EFAULT;
- }
-
- rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lump,
- lum_size);
- cl_lov_delay_create_clear(&file->f_flags);
-
- kvfree(lump);
- return rc;
-}
-
-static int ll_file_getstripe(struct inode *inode,
- struct lov_user_md __user *lum)
-{
- struct lu_env *env;
- u16 refcheck;
- int rc;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-static int ll_lov_setstripe(struct inode *inode, struct file *file,
- unsigned long arg)
-{
- struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
- struct lov_user_md *klum;
- int lum_size, rc;
- __u64 flags = FMODE_WRITE;
-
- rc = ll_copy_user_md(lum, &klum);
- if (rc < 0)
- return rc;
-
- lum_size = rc;
- rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, klum,
- lum_size);
- cl_lov_delay_create_clear(&file->f_flags);
- if (rc == 0) {
- __u32 gen;
-
- put_user(0, &lum->lmm_stripe_count);
-
- ll_layout_refresh(inode, &gen);
- rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
- }
-
- kfree(klum);
- return rc;
-}
-
-static int
-ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_grouplock grouplock;
- int rc;
-
- if (arg == 0) {
- CWARN("group id for group lock must not be 0\n");
- return -EINVAL;
- }
-
- if (ll_file_nolock(file))
- return -EOPNOTSUPP;
-
- spin_lock(&lli->lli_lock);
- if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
- CWARN("group lock already existed with gid %lu\n",
- fd->fd_grouplock.lg_gid);
- spin_unlock(&lli->lli_lock);
- return -EINVAL;
- }
- LASSERT(!fd->fd_grouplock.lg_lock);
- spin_unlock(&lli->lli_lock);
-
- rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
- arg, (file->f_flags & O_NONBLOCK), &grouplock);
- if (rc)
- return rc;
-
- spin_lock(&lli->lli_lock);
- if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
- spin_unlock(&lli->lli_lock);
- CERROR("another thread just won the race\n");
- cl_put_grouplock(&grouplock);
- return -EINVAL;
- }
-
- fd->fd_flags |= LL_FILE_GROUP_LOCKED;
- fd->fd_grouplock = grouplock;
- spin_unlock(&lli->lli_lock);
-
- CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
- return 0;
-}
-
-static int ll_put_grouplock(struct inode *inode, struct file *file,
- unsigned long arg)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_grouplock grouplock;
-
- spin_lock(&lli->lli_lock);
- if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- spin_unlock(&lli->lli_lock);
- CWARN("no group lock held\n");
- return -EINVAL;
- }
- LASSERT(fd->fd_grouplock.lg_lock);
-
- if (fd->fd_grouplock.lg_gid != arg) {
- CWARN("group lock %lu doesn't match current id %lu\n",
- arg, fd->fd_grouplock.lg_gid);
- spin_unlock(&lli->lli_lock);
- return -EINVAL;
- }
-
- grouplock = fd->fd_grouplock;
- memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
- fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
- spin_unlock(&lli->lli_lock);
-
- cl_put_grouplock(&grouplock);
- CDEBUG(D_INFO, "group lock %lu released\n", arg);
- return 0;
-}
-
-/**
- * Close inode open handle
- *
- * \param inode [in] inode in question
- * \param it [in,out] intent which contains open info and result
- *
- * \retval 0 success
- * \retval <0 failure
- */
-int ll_release_openhandle(struct inode *inode, struct lookup_intent *it)
-{
- struct obd_client_handle *och;
- int rc;
-
- LASSERT(inode);
-
- /* Root ? Do nothing. */
- if (is_root_inode(inode))
- return 0;
-
- /* No open handle to close? Move away */
- if (!it_disposition(it, DISP_OPEN_OPEN))
- return 0;
-
- LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
-
- och = kzalloc(sizeof(*och), GFP_NOFS);
- if (!och) {
- rc = -ENOMEM;
- goto out;
- }
-
- ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
-
- rc = ll_close_inode_openhandle(inode, och, 0, NULL);
-out:
- /* this one is in place of ll_file_open */
- if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
- ptlrpc_req_finished(it->it_request);
- it_clear_disposition(it, DISP_ENQ_OPEN_REF);
- }
- return rc;
-}
-
-/**
- * Get size for inode for which FIEMAP mapping is requested.
- * Make the FIEMAP get_info call and returns the result.
- *
- * \param fiemap kernel buffer to hold extens
- * \param num_bytes kernel buffer size
- */
-static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
- size_t num_bytes)
-{
- struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
- struct lu_env *env;
- u16 refcheck;
- int rc = 0;
-
- /* Checks for fiemap flags */
- if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
- fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
- return -EBADR;
- }
-
- /* Check for FIEMAP_FLAG_SYNC */
- if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
- rc = filemap_fdatawrite(inode->i_mapping);
- if (rc)
- return rc;
- }
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- if (i_size_read(inode) == 0) {
- rc = ll_glimpse_size(inode);
- if (rc)
- goto out;
- }
-
- fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
- obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
- obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
-
- /* If filesize is 0, then there would be no objects for mapping */
- if (fmkey.lfik_oa.o_size == 0) {
- fiemap->fm_mapped_extents = 0;
- rc = 0;
- goto out;
- }
-
- memcpy(&fmkey.lfik_fiemap, fiemap, sizeof(*fiemap));
-
- rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
- &fmkey, fiemap, &num_bytes);
-out:
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-int ll_fid2path(struct inode *inode, void __user *arg)
-{
- struct obd_export *exp = ll_i2mdexp(inode);
- const struct getinfo_fid2path __user *gfin = arg;
- struct getinfo_fid2path *gfout;
- u32 pathlen;
- size_t outsize;
- int rc;
-
- if (!capable(CAP_DAC_READ_SEARCH) &&
- !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
- return -EPERM;
-
- /* Only need to get the buflen */
- if (get_user(pathlen, &gfin->gf_pathlen))
- return -EFAULT;
-
- if (pathlen > PATH_MAX)
- return -EINVAL;
-
- outsize = sizeof(*gfout) + pathlen;
-
- gfout = kzalloc(outsize, GFP_NOFS);
- if (!gfout)
- return -ENOMEM;
-
- if (copy_from_user(gfout, arg, sizeof(*gfout))) {
- rc = -EFAULT;
- goto gf_free;
- }
-
- /* Call mdc_iocontrol */
- rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
- if (rc != 0)
- goto gf_free;
-
- if (copy_to_user(arg, gfout, outsize))
- rc = -EFAULT;
-
-gf_free:
- kfree(gfout);
- return rc;
-}
-
-/*
- * Read the data_version for inode.
- *
- * This value is computed using stripe object version on OST.
- * Version is computed using server side locking.
- *
- * @param flags if do sync on the OST side;
- * 0: no sync
- * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
- * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
- */
-int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
-{
- struct cl_object *obj = ll_i2info(inode)->lli_clob;
- struct lu_env *env;
- struct cl_io *io;
- u16 refcheck;
- int result;
-
- /* If no file object initialized, we consider its version is 0. */
- if (!obj) {
- *data_version = 0;
- return 0;
- }
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = obj;
- io->u.ci_data_version.dv_data_version = 0;
- io->u.ci_data_version.dv_flags = flags;
-
-restart:
- if (!cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj))
- result = cl_io_loop(env, io);
- else
- result = io->ci_result;
-
- *data_version = io->u.ci_data_version.dv_data_version;
-
- cl_io_fini(env, io);
-
- if (unlikely(io->ci_need_restart))
- goto restart;
-
- cl_env_put(env, &refcheck);
-
- return result;
-}
-
-/*
- * Trigger a HSM release request for the provided inode.
- */
-int ll_hsm_release(struct inode *inode)
-{
- struct lu_env *env;
- struct obd_client_handle *och = NULL;
- __u64 data_version = 0;
- int rc;
- u16 refcheck;
-
- CDEBUG(D_INODE, "%s: Releasing file " DFID ".\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&ll_i2info(inode)->lli_fid));
-
- och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
- if (IS_ERR(och)) {
- rc = PTR_ERR(och);
- goto out;
- }
-
- /* Grab latest data_version and [am]time values */
- rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
- if (rc != 0)
- goto out;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- rc = PTR_ERR(env);
- goto out;
- }
-
- ll_merge_attr(env, inode);
- cl_env_put(env, &refcheck);
-
- /* Release the file.
- * NB: lease lock handle is released in mdc_hsm_release_pack() because
- * we still need it to pack l_remote_handle to MDT.
- */
- rc = ll_close_inode_openhandle(inode, och, MDS_HSM_RELEASE,
- &data_version);
- och = NULL;
-
-out:
- if (och && !IS_ERR(och)) /* close the file */
- ll_lease_close(och, inode, NULL);
-
- return rc;
-}
-
-struct ll_swap_stack {
- u64 dv1;
- u64 dv2;
- struct inode *inode1;
- struct inode *inode2;
- bool check_dv1;
- bool check_dv2;
-};
-
-static int ll_swap_layouts(struct file *file1, struct file *file2,
- struct lustre_swap_layouts *lsl)
-{
- struct mdc_swap_layouts msl;
- struct md_op_data *op_data;
- __u32 gid;
- __u64 dv;
- struct ll_swap_stack *llss = NULL;
- int rc;
-
- llss = kzalloc(sizeof(*llss), GFP_NOFS);
- if (!llss)
- return -ENOMEM;
-
- llss->inode1 = file_inode(file1);
- llss->inode2 = file_inode(file2);
-
- rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
- if (rc < 0)
- goto free;
-
- /* we use 2 bool because it is easier to swap than 2 bits */
- if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
- llss->check_dv1 = true;
-
- if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
- llss->check_dv2 = true;
-
- /* we cannot use lsl->sl_dvX directly because we may swap them */
- llss->dv1 = lsl->sl_dv1;
- llss->dv2 = lsl->sl_dv2;
-
- rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
- if (!rc) /* same file, done! */
- goto free;
-
- if (rc < 0) { /* sequentialize it */
- swap(llss->inode1, llss->inode2);
- swap(file1, file2);
- swap(llss->dv1, llss->dv2);
- swap(llss->check_dv1, llss->check_dv2);
- }
-
- gid = lsl->sl_gid;
- if (gid != 0) { /* application asks to flush dirty cache */
- rc = ll_get_grouplock(llss->inode1, file1, gid);
- if (rc < 0)
- goto free;
-
- rc = ll_get_grouplock(llss->inode2, file2, gid);
- if (rc < 0) {
- ll_put_grouplock(llss->inode1, file1, gid);
- goto free;
- }
- }
-
- /* ultimate check, before swapping the layouts we check if
- * dataversion has changed (if requested)
- */
- if (llss->check_dv1) {
- rc = ll_data_version(llss->inode1, &dv, 0);
- if (rc)
- goto putgl;
- if (dv != llss->dv1) {
- rc = -EAGAIN;
- goto putgl;
- }
- }
-
- if (llss->check_dv2) {
- rc = ll_data_version(llss->inode2, &dv, 0);
- if (rc)
- goto putgl;
- if (dv != llss->dv2) {
- rc = -EAGAIN;
- goto putgl;
- }
- }
-
- /* struct md_op_data is used to send the swap args to the mdt
- * only flags is missing, so we use struct mdc_swap_layouts
- * through the md_op_data->op_data
- */
- /* flags from user space have to be converted before they are send to
- * server, no flag is sent today, they are only used on the client
- */
- msl.msl_flags = 0;
- rc = -ENOMEM;
- op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
- 0, LUSTRE_OPC_ANY, &msl);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto free;
- }
-
- rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
- sizeof(*op_data), op_data, NULL);
- ll_finish_md_op_data(op_data);
-
-putgl:
- if (gid != 0) {
- ll_put_grouplock(llss->inode2, file2, gid);
- ll_put_grouplock(llss->inode1, file1, gid);
- }
-
-free:
- kfree(llss);
-
- return rc;
-}
-
-int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
-{
- struct md_op_data *op_data;
- int rc;
-
- /* Detect out-of range masks */
- if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
- return -EINVAL;
-
- /* Non-root users are forbidden to set or clear flags which are
- * NOT defined in HSM_USER_MASK.
- */
- if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- /* Detect out-of range archive id */
- if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
- (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
- return -EINVAL;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hss);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
- sizeof(*op_data), op_data, NULL);
-
- ll_finish_md_op_data(op_data);
-
- return rc;
-}
-
-static int ll_hsm_import(struct inode *inode, struct file *file,
- struct hsm_user_import *hui)
-{
- struct hsm_state_set *hss = NULL;
- struct iattr *attr = NULL;
- int rc;
-
- if (!S_ISREG(inode->i_mode))
- return -EINVAL;
-
- /* set HSM flags */
- hss = kzalloc(sizeof(*hss), GFP_NOFS);
- if (!hss)
- return -ENOMEM;
-
- hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
- hss->hss_archive_id = hui->hui_archive_id;
- hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
- rc = ll_hsm_state_set(inode, hss);
- if (rc != 0)
- goto free_hss;
-
- attr = kzalloc(sizeof(*attr), GFP_NOFS);
- if (!attr) {
- rc = -ENOMEM;
- goto free_hss;
- }
-
- attr->ia_mode = hui->hui_mode & 0777;
- attr->ia_mode |= S_IFREG;
- attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
- attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
- attr->ia_size = hui->hui_size;
- attr->ia_mtime.tv_sec = hui->hui_mtime;
- attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
- attr->ia_atime.tv_sec = hui->hui_atime;
- attr->ia_atime.tv_nsec = hui->hui_atime_ns;
-
- attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
- ATTR_UID | ATTR_GID |
- ATTR_MTIME | ATTR_MTIME_SET |
- ATTR_ATIME | ATTR_ATIME_SET;
-
- inode_lock(inode);
-
- rc = ll_setattr_raw(file->f_path.dentry, attr, true);
- if (rc == -ENODATA)
- rc = 0;
-
- inode_unlock(inode);
-
- kfree(attr);
-free_hss:
- kfree(hss);
- return rc;
-}
-
-static inline long ll_lease_type_from_fmode(fmode_t fmode)
-{
- return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
- ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
-}
-
-static long
-ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct inode *inode = file_inode(file);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- int flags, rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p),cmd=%x\n",
- PFID(ll_inode2fid(inode)), inode, cmd);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
-
- /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
- if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
- return -ENOTTY;
-
- switch (cmd) {
- case LL_IOC_GETFLAGS:
- /* Get the current value of the file flags */
- return put_user(fd->fd_flags, (int __user *)arg);
- case LL_IOC_SETFLAGS:
- case LL_IOC_CLRFLAGS:
- /* Set or clear specific file flags */
- /* XXX This probably needs checks to ensure the flags are
- * not abused, and to handle any flag side effects.
- */
- if (get_user(flags, (int __user *)arg))
- return -EFAULT;
-
- if (cmd == LL_IOC_SETFLAGS) {
- if ((flags & LL_FILE_IGNORE_LOCK) &&
- !(file->f_flags & O_DIRECT)) {
- CERROR("%s: unable to disable locking on non-O_DIRECT file\n",
- current->comm);
- return -EINVAL;
- }
-
- fd->fd_flags |= flags;
- } else {
- fd->fd_flags &= ~flags;
- }
- return 0;
- case LL_IOC_LOV_SETSTRIPE:
- return ll_lov_setstripe(inode, file, arg);
- case LL_IOC_LOV_SETEA:
- return ll_lov_setea(inode, file, arg);
- case LL_IOC_LOV_SWAP_LAYOUTS: {
- struct file *file2;
- struct lustre_swap_layouts lsl;
-
- if (copy_from_user(&lsl, (char __user *)arg,
- sizeof(struct lustre_swap_layouts)))
- return -EFAULT;
-
- if ((file->f_flags & O_ACCMODE) == O_RDONLY)
- return -EPERM;
-
- file2 = fget(lsl.sl_fd);
- if (!file2)
- return -EBADF;
-
- /* O_WRONLY or O_RDWR */
- if ((file2->f_flags & O_ACCMODE) == O_RDONLY) {
- rc = -EPERM;
- goto out;
- }
-
- if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
- struct obd_client_handle *och = NULL;
- struct ll_inode_info *lli;
- struct inode *inode2;
-
- if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE) {
- rc = -EINVAL;
- goto out;
- }
-
- lli = ll_i2info(inode);
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- och = fd->fd_lease_och;
- fd->fd_lease_och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
- if (!och) {
- rc = -ENOLCK;
- goto out;
- }
- inode2 = file_inode(file2);
- rc = ll_swap_layouts_close(och, inode, inode2);
- } else {
- rc = ll_swap_layouts(file, file2, &lsl);
- }
-out:
- fput(file2);
- return rc;
- }
- case LL_IOC_LOV_GETSTRIPE:
- return ll_file_getstripe(inode,
- (struct lov_user_md __user *)arg);
- case FSFILT_IOC_GETFLAGS:
- case FSFILT_IOC_SETFLAGS:
- return ll_iocontrol(inode, file, cmd, arg);
- case FSFILT_IOC_GETVERSION_OLD:
- case FSFILT_IOC_GETVERSION:
- return put_user(inode->i_generation, (int __user *)arg);
- case LL_IOC_GROUP_LOCK:
- return ll_get_grouplock(inode, file, arg);
- case LL_IOC_GROUP_UNLOCK:
- return ll_put_grouplock(inode, file, arg);
- case IOC_OBD_STATFS:
- return ll_obd_statfs(inode, (void __user *)arg);
-
- /* We need to special case any other ioctls we want to handle,
- * to send them to the MDS/OST as appropriate and to properly
- * network encode the arg field.
- case FSFILT_IOC_SETVERSION_OLD:
- case FSFILT_IOC_SETVERSION:
- */
- case LL_IOC_FLUSHCTX:
- return ll_flush_ctx(inode);
- case LL_IOC_PATH2FID: {
- if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
- sizeof(struct lu_fid)))
- return -EFAULT;
-
- return 0;
- }
- case LL_IOC_GETPARENT:
- return ll_getparent(file, (struct getparent __user *)arg);
- case OBD_IOC_FID2PATH:
- return ll_fid2path(inode, (void __user *)arg);
- case LL_IOC_DATA_VERSION: {
- struct ioc_data_version idv;
- int rc;
-
- if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
- return -EFAULT;
-
- idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
- rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
- if (rc == 0 && copy_to_user((char __user *)arg, &idv,
- sizeof(idv)))
- return -EFAULT;
-
- return rc;
- }
-
- case LL_IOC_GET_MDTIDX: {
- int mdtidx;
-
- mdtidx = ll_get_mdt_idx(inode);
- if (mdtidx < 0)
- return mdtidx;
-
- if (put_user(mdtidx, (int __user *)arg))
- return -EFAULT;
-
- return 0;
- }
- case OBD_IOC_GETDTNAME:
- case OBD_IOC_GETMDNAME:
- return ll_get_obd_name(inode, cmd, arg);
- case LL_IOC_HSM_STATE_GET: {
- struct md_op_data *op_data;
- struct hsm_user_state *hus;
- int rc;
-
- hus = kzalloc(sizeof(*hus), GFP_NOFS);
- if (!hus)
- return -ENOMEM;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hus);
- if (IS_ERR(op_data)) {
- kfree(hus);
- return PTR_ERR(op_data);
- }
-
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
- op_data, NULL);
-
- if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
- rc = -EFAULT;
-
- ll_finish_md_op_data(op_data);
- kfree(hus);
- return rc;
- }
- case LL_IOC_HSM_STATE_SET: {
- struct hsm_state_set *hss;
- int rc;
-
- hss = memdup_user((char __user *)arg, sizeof(*hss));
- if (IS_ERR(hss))
- return PTR_ERR(hss);
-
- rc = ll_hsm_state_set(inode, hss);
-
- kfree(hss);
- return rc;
- }
- case LL_IOC_HSM_ACTION: {
- struct md_op_data *op_data;
- struct hsm_current_action *hca;
- int rc;
-
- hca = kzalloc(sizeof(*hca), GFP_NOFS);
- if (!hca)
- return -ENOMEM;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hca);
- if (IS_ERR(op_data)) {
- kfree(hca);
- return PTR_ERR(op_data);
- }
-
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
- op_data, NULL);
-
- if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
- rc = -EFAULT;
-
- ll_finish_md_op_data(op_data);
- kfree(hca);
- return rc;
- }
- case LL_IOC_SET_LEASE: {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_client_handle *och = NULL;
- bool lease_broken;
- fmode_t fmode;
-
- switch (arg) {
- case LL_LEASE_WRLCK:
- if (!(file->f_mode & FMODE_WRITE))
- return -EPERM;
- fmode = FMODE_WRITE;
- break;
- case LL_LEASE_RDLCK:
- if (!(file->f_mode & FMODE_READ))
- return -EPERM;
- fmode = FMODE_READ;
- break;
- case LL_LEASE_UNLCK:
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- och = fd->fd_lease_och;
- fd->fd_lease_och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
-
- if (!och)
- return -ENOLCK;
-
- fmode = och->och_flags;
- rc = ll_lease_close(och, inode, &lease_broken);
- if (rc < 0)
- return rc;
-
- if (lease_broken)
- fmode = 0;
-
- return ll_lease_type_from_fmode(fmode);
- default:
- return -EINVAL;
- }
-
- CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
-
- /* apply for lease */
- och = ll_lease_open(inode, file, fmode, 0);
- if (IS_ERR(och))
- return PTR_ERR(och);
-
- rc = 0;
- mutex_lock(&lli->lli_och_mutex);
- if (!fd->fd_lease_och) {
- fd->fd_lease_och = och;
- och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
- if (och) {
- /* impossible now that only excl is supported for now */
- ll_lease_close(och, inode, &lease_broken);
- rc = -EBUSY;
- }
- return rc;
- }
- case LL_IOC_GET_LEASE: {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ldlm_lock *lock = NULL;
- fmode_t fmode = 0;
-
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- struct obd_client_handle *och = fd->fd_lease_och;
-
- lock = ldlm_handle2lock(&och->och_lease_handle);
- if (lock) {
- lock_res_and_lock(lock);
- if (!ldlm_is_cancel(lock))
- fmode = och->och_flags;
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
- }
- }
- mutex_unlock(&lli->lli_och_mutex);
- return ll_lease_type_from_fmode(fmode);
- }
- case LL_IOC_HSM_IMPORT: {
- struct hsm_user_import *hui;
-
- hui = memdup_user((void __user *)arg, sizeof(*hui));
- if (IS_ERR(hui))
- return PTR_ERR(hui);
-
- rc = ll_hsm_import(inode, file, hui);
-
- kfree(hui);
- return rc;
- }
- default: {
- int err;
-
- if (ll_iocontrol_call(inode, file, cmd, arg, &err) ==
- LLIOC_STOP)
- return err;
-
- return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
- (void __user *)arg);
- }
- }
-}
-
-static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
-{
- struct inode *inode = file_inode(file);
- loff_t retval, eof = 0;
-
- retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
- (origin == SEEK_CUR) ? file->f_pos : 0);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), to=%llu=%#llx(%d)\n",
- PFID(ll_inode2fid(inode)), inode, retval, retval, origin);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
-
- if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
- retval = ll_glimpse_size(inode);
- if (retval != 0)
- return retval;
- eof = i_size_read(inode);
- }
-
- return generic_file_llseek_size(file, offset, origin,
- ll_file_maxbytes(inode), eof);
-}
-
-static int ll_flush(struct file *file, fl_owner_t id)
-{
- struct inode *inode = file_inode(file);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- int rc, err;
-
- LASSERT(!S_ISDIR(inode->i_mode));
-
- /* catch async errors that were recorded back when async writeback
- * failed for pages in this mapping.
- */
- rc = lli->lli_async_rc;
- lli->lli_async_rc = 0;
- if (lli->lli_clob) {
- err = lov_read_and_clear_async_rc(lli->lli_clob);
- if (!rc)
- rc = err;
- }
-
- /* The application has been told about write failure already.
- * Do not report failure again.
- */
- if (fd->fd_write_failed)
- return 0;
- return rc ? -EIO : 0;
-}
-
-/**
- * Called to make sure a portion of file has been written out.
- * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
- *
- * Return how many pages have been written.
- */
-int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
- enum cl_fsync_mode mode, int ignore_layout)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct cl_fsync_io *fio;
- int result;
- u16 refcheck;
-
- if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
- mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
- return -EINVAL;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = ll_i2info(inode)->lli_clob;
- io->ci_ignore_layout = ignore_layout;
-
- /* initialize parameters for sync */
- fio = &io->u.ci_fsync;
- fio->fi_start = start;
- fio->fi_end = end;
- fio->fi_fid = ll_inode2fid(inode);
- fio->fi_mode = mode;
- fio->fi_nr_written = 0;
-
- if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
- result = cl_io_loop(env, io);
- else
- result = io->ci_result;
- if (result == 0)
- result = fio->fi_nr_written;
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
-
- return result;
-}
-
-int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-{
- struct inode *inode = file_inode(file);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ptlrpc_request *req;
- int rc, err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
-
- rc = file_write_and_wait_range(file, start, end);
- inode_lock(inode);
-
- /* catch async errors that were recorded back when async writeback
- * failed for pages in this mapping.
- */
- if (!S_ISDIR(inode->i_mode)) {
- err = lli->lli_async_rc;
- lli->lli_async_rc = 0;
- if (rc == 0)
- rc = err;
- if (lli->lli_clob) {
- err = lov_read_and_clear_async_rc(lli->lli_clob);
- if (rc == 0)
- rc = err;
- }
- }
-
- err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
- if (!rc)
- rc = err;
- if (!err)
- ptlrpc_req_finished(req);
-
- if (S_ISREG(inode->i_mode)) {
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
- err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
- if (rc == 0 && err < 0)
- rc = err;
- if (rc < 0)
- fd->fd_write_failed = true;
- else
- fd->fd_write_failed = false;
- }
-
- inode_unlock(inode);
- return rc;
-}
-
-static int
-ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
-{
- struct inode *inode = file_inode(file);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ldlm_enqueue_info einfo = {
- .ei_type = LDLM_FLOCK,
- .ei_cb_cp = ldlm_flock_completion_ast,
- .ei_cbdata = file_lock,
- };
- struct md_op_data *op_data;
- struct lustre_handle lockh = {0};
- union ldlm_policy_data flock = { { 0 } };
- int fl_type = file_lock->fl_type;
- __u64 flags = 0;
- int rc;
- int rc2 = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID " file_lock=%p\n",
- PFID(ll_inode2fid(inode)), file_lock);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
-
- if (file_lock->fl_flags & FL_FLOCK)
- LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
- else if (!(file_lock->fl_flags & FL_POSIX))
- return -EINVAL;
-
- flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
- flock.l_flock.pid = file_lock->fl_pid;
- flock.l_flock.start = file_lock->fl_start;
- flock.l_flock.end = file_lock->fl_end;
-
- /* Somewhat ugly workaround for svc lockd.
- * lockd installs custom fl_lmops->lm_compare_owner that checks
- * for the fl_owner to be the same (which it always is on local node
- * I guess between lockd processes) and then compares pid.
- * As such we assign pid to the owner field to make it all work,
- * conflict with normal locks is unlikely since pid space and
- * pointer space for current->files are not intersecting
- */
- if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
- flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
-
- switch (fl_type) {
- case F_RDLCK:
- einfo.ei_mode = LCK_PR;
- break;
- case F_UNLCK:
- /* An unlock request may or may not have any relation to
- * existing locks so we may not be able to pass a lock handle
- * via a normal ldlm_lock_cancel() request. The request may even
- * unlock a byte range in the middle of an existing lock. In
- * order to process an unlock request we need all of the same
- * information that is given with a normal read or write record
- * lock request. To avoid creating another ldlm unlock (cancel)
- * message we'll treat a LCK_NL flock request as an unlock.
- */
- einfo.ei_mode = LCK_NL;
- break;
- case F_WRLCK:
- einfo.ei_mode = LCK_PW;
- break;
- default:
- CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
- return -ENOTSUPP;
- }
-
- switch (cmd) {
- case F_SETLKW:
-#ifdef F_SETLKW64
- case F_SETLKW64:
-#endif
- flags = 0;
- break;
- case F_SETLK:
-#ifdef F_SETLK64
- case F_SETLK64:
-#endif
- flags = LDLM_FL_BLOCK_NOWAIT;
- break;
- case F_GETLK:
-#ifdef F_GETLK64
- case F_GETLK64:
-#endif
- flags = LDLM_FL_TEST_LOCK;
- break;
- default:
- CERROR("unknown fcntl lock command: %d\n", cmd);
- return -EINVAL;
- }
-
- /*
- * Save the old mode so that if the mode in the lock changes we
- * can decrement the appropriate reader or writer refcount.
- */
- file_lock->fl_type = einfo.ei_mode;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- CDEBUG(D_DLMTRACE, "inode=" DFID ", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
- PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
- einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
-
- rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
- flags);
-
- /* Restore the file lock type if not TEST lock. */
- if (!(flags & LDLM_FL_TEST_LOCK))
- file_lock->fl_type = fl_type;
-
- if ((rc == 0 || file_lock->fl_type == F_UNLCK) &&
- !(flags & LDLM_FL_TEST_LOCK))
- rc2 = locks_lock_file_wait(file, file_lock);
-
- if (rc2 && file_lock->fl_type != F_UNLCK) {
- einfo.ei_mode = LCK_NL;
- md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
- &lockh, flags);
- rc = rc2;
- }
-
- ll_finish_md_op_data(op_data);
-
- return rc;
-}
-
-int ll_get_fid_by_name(struct inode *parent, const char *name,
- int namelen, struct lu_fid *fid,
- struct inode **inode)
-{
- struct md_op_data *op_data = NULL;
- struct ptlrpc_request *req;
- struct mdt_body *body;
- int rc;
-
- op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
- rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc < 0)
- return rc;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EFAULT;
- goto out_req;
- }
- if (fid)
- *fid = body->mbo_fid1;
-
- if (inode)
- rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
-out_req:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
- const char *name, int namelen)
-{
- struct ptlrpc_request *request = NULL;
- struct obd_client_handle *och = NULL;
- struct inode *child_inode = NULL;
- struct dentry *dchild = NULL;
- struct md_op_data *op_data;
- struct mdt_body *body;
- u64 data_version = 0;
- struct qstr qstr;
- int rc;
-
- CDEBUG(D_VFSTRACE, "migrate %s under " DFID " to MDT%d\n",
- name, PFID(ll_inode2fid(parent)), mdtidx);
-
- op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- /* Get child FID first */
- qstr.hash = full_name_hash(parent, name, namelen);
- qstr.name = name;
- qstr.len = namelen;
- dchild = d_lookup(file_dentry(file), &qstr);
- if (dchild) {
- op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
- if (dchild->d_inode)
- child_inode = igrab(dchild->d_inode);
- dput(dchild);
- }
-
- if (!child_inode) {
- rc = ll_get_fid_by_name(parent, name, namelen,
- &op_data->op_fid3, &child_inode);
- if (rc)
- goto out_free;
- }
-
- if (!child_inode) {
- rc = -EINVAL;
- goto out_free;
- }
-
- inode_lock(child_inode);
- op_data->op_fid3 = *ll_inode2fid(child_inode);
- if (!fid_is_sane(&op_data->op_fid3)) {
- CERROR("%s: migrate %s, but fid " DFID " is insane\n",
- ll_get_fsname(parent->i_sb, NULL, 0), name,
- PFID(&op_data->op_fid3));
- rc = -EINVAL;
- goto out_unlock;
- }
-
- rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
- if (rc < 0)
- goto out_unlock;
-
- if (rc == mdtidx) {
- CDEBUG(D_INFO, "%s: " DFID " is already on MDT%d.\n", name,
- PFID(&op_data->op_fid3), mdtidx);
- rc = 0;
- goto out_unlock;
- }
-again:
- if (S_ISREG(child_inode->i_mode)) {
- och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
- if (IS_ERR(och)) {
- rc = PTR_ERR(och);
- och = NULL;
- goto out_unlock;
- }
-
- rc = ll_data_version(child_inode, &data_version,
- LL_DV_WR_FLUSH);
- if (rc)
- goto out_close;
-
- op_data->op_handle = och->och_fh;
- op_data->op_data = och->och_mod;
- op_data->op_data_version = data_version;
- op_data->op_lease_handle = och->och_lease_handle;
- op_data->op_bias |= MDS_RENAME_MIGRATE;
- }
-
- op_data->op_mds = mdtidx;
- op_data->op_cli_flags = CLI_MIGRATE;
- rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
- namelen, name, namelen, &request);
- if (!rc) {
- LASSERT(request);
- ll_update_times(request, parent);
-
- body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
- LASSERT(body);
-
- /*
- * If the server does release layout lock, then we cleanup
- * the client och here, otherwise release it in out_close:
- */
- if (och && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
- obd_mod_put(och->och_mod);
- md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp,
- och);
- och->och_fh.cookie = DEAD_HANDLE_MAGIC;
- kfree(och);
- och = NULL;
- }
- }
-
- if (request) {
- ptlrpc_req_finished(request);
- request = NULL;
- }
-
- /* Try again if the file layout has changed. */
- if (rc == -EAGAIN && S_ISREG(child_inode->i_mode))
- goto again;
-
-out_close:
- if (och) /* close the file */
- ll_lease_close(och, child_inode, NULL);
- if (!rc)
- clear_nlink(child_inode);
-out_unlock:
- inode_unlock(child_inode);
- iput(child_inode);
-out_free:
- ll_finish_md_op_data(op_data);
- return rc;
-}
-
-static int
-ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
-{
- return -ENOSYS;
-}
-
-/**
- * test if some locks matching bits and l_req_mode are acquired
- * - bits can be in different locks
- * - if found clear the common lock bits in *bits
- * - the bits not found, are kept in *bits
- * \param inode [IN]
- * \param bits [IN] searched lock bits [IN]
- * \param l_req_mode [IN] searched lock mode
- * \retval boolean, true iff all bits are found
- */
-int ll_have_md_lock(struct inode *inode, __u64 *bits,
- enum ldlm_mode l_req_mode)
-{
- struct lustre_handle lockh;
- union ldlm_policy_data policy;
- enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
- (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
- struct lu_fid *fid;
- __u64 flags;
- int i;
-
- if (!inode)
- return 0;
-
- fid = &ll_i2info(inode)->lli_fid;
- CDEBUG(D_INFO, "trying to match res " DFID " mode %s\n", PFID(fid),
- ldlm_lockname[mode]);
-
- flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
- for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
- policy.l_inodebits.bits = *bits & (1 << i);
- if (policy.l_inodebits.bits == 0)
- continue;
-
- if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
- &policy, mode, &lockh)) {
- struct ldlm_lock *lock;
-
- lock = ldlm_handle2lock(&lockh);
- if (lock) {
- *bits &=
- ~(lock->l_policy_data.l_inodebits.bits);
- LDLM_LOCK_PUT(lock);
- } else {
- *bits &= ~policy.l_inodebits.bits;
- }
- }
- }
- return *bits == 0;
-}
-
-enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
- struct lustre_handle *lockh, __u64 flags,
- enum ldlm_mode mode)
-{
- union ldlm_policy_data policy = { .l_inodebits = { bits } };
- struct lu_fid *fid;
-
- fid = &ll_i2info(inode)->lli_fid;
- CDEBUG(D_INFO, "trying to match res " DFID "\n", PFID(fid));
-
- return md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
- fid, LDLM_IBITS, &policy, mode, lockh);
-}
-
-static int ll_inode_revalidate_fini(struct inode *inode, int rc)
-{
- /* Already unlinked. Just update nlink and return success */
- if (rc == -ENOENT) {
- clear_nlink(inode);
- /* If it is striped directory, and there is bad stripe
- * Let's revalidate the dentry again, instead of returning
- * error
- */
- if (S_ISDIR(inode->i_mode) && ll_i2info(inode)->lli_lsm_md)
- return 0;
-
- /* This path cannot be hit for regular files unless in
- * case of obscure races, so no need to validate size.
- */
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
- return 0;
- } else if (rc != 0) {
- CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
- "%s: revalidate FID " DFID " error: rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), rc);
- }
-
- return rc;
-}
-
-static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
-{
- struct inode *inode = d_inode(dentry);
- struct ptlrpc_request *req = NULL;
- struct obd_export *exp;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p),name=%pd\n",
- PFID(ll_inode2fid(inode)), inode, dentry);
-
- exp = ll_i2mdexp(inode);
-
- /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
- * But under CMD case, it caused some lock issues, should be fixed
- * with new CMD ibits lock. See bug 12718
- */
- if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
- struct lookup_intent oit = { .it_op = IT_GETATTR };
- struct md_op_data *op_data;
-
- if (ibits == MDS_INODELOCK_LOOKUP)
- oit.it_op = IT_LOOKUP;
-
- /* Call getattr by fid, so do not provide name at all. */
- op_data = ll_prep_md_op_data(NULL, inode,
- inode, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = md_intent_lock(exp, op_data, &oit, &req,
- &ll_md_blocking_ast, 0);
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- rc = ll_inode_revalidate_fini(inode, rc);
- goto out;
- }
-
- rc = ll_revalidate_it_finish(req, &oit, inode);
- if (rc != 0) {
- ll_intent_release(&oit);
- goto out;
- }
-
- /* Unlinked? Unhash dentry, so it is not picked up later by
- * do_lookup() -> ll_revalidate_it(). We cannot use d_drop
- * here to preserve get_cwd functionality on 2.6.
- * Bug 10503
- */
- if (!d_inode(dentry)->i_nlink) {
- spin_lock(&inode->i_lock);
- d_lustre_invalidate(dentry, 0);
- spin_unlock(&inode->i_lock);
- }
-
- ll_lookup_finish_locks(&oit, inode);
- } else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
- struct ll_sb_info *sbi = ll_i2sbi(d_inode(dentry));
- u64 valid = OBD_MD_FLGETATTR;
- struct md_op_data *op_data;
- int ealen = 0;
-
- if (S_ISREG(inode->i_mode)) {
- rc = ll_get_default_mdsize(sbi, &ealen);
- if (rc)
- return rc;
- valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
- 0, ealen, LUSTRE_OPC_ANY,
- NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = valid;
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc)
- return ll_inode_revalidate_fini(inode, rc);
-
- rc = ll_prep_inode(&inode, req, NULL, NULL);
- }
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int ll_merge_md_attr(struct inode *inode)
-{
- struct cl_attr attr = { 0 };
- int rc;
-
- LASSERT(ll_i2info(inode)->lli_lsm_md);
- rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
- &attr, ll_md_blocking_ast);
- if (rc)
- return rc;
-
- set_nlink(inode, attr.cat_nlink);
- inode->i_blocks = attr.cat_blocks;
- i_size_write(inode, attr.cat_size);
-
- ll_i2info(inode)->lli_atime = attr.cat_atime;
- ll_i2info(inode)->lli_mtime = attr.cat_mtime;
- ll_i2info(inode)->lli_ctime = attr.cat_ctime;
-
- return 0;
-}
-
-static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
-{
- struct inode *inode = d_inode(dentry);
- int rc;
-
- rc = __ll_inode_revalidate(dentry, ibits);
- if (rc != 0)
- return rc;
-
- /* if object isn't regular file, don't validate size */
- if (!S_ISREG(inode->i_mode)) {
- if (S_ISDIR(inode->i_mode) &&
- ll_i2info(inode)->lli_lsm_md) {
- rc = ll_merge_md_attr(inode);
- if (rc)
- return rc;
- }
-
- LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
- LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
- LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
- } else {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- /* In case of restore, the MDT has the right size and has
- * already send it back without granting the layout lock,
- * inode is up-to-date so glimpse is useless.
- * Also to glimpse we need the layout, in case of a running
- * restore the MDT holds the layout lock so the glimpse will
- * block up to the end of restore (getattr will block)
- */
- if (!test_bit(LLIF_FILE_RESTORING, &lli->lli_flags))
- rc = ll_glimpse_size(inode);
- }
- return rc;
-}
-
-int ll_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
-{
- struct inode *inode = d_inode(path->dentry);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- int res;
-
- res = ll_inode_revalidate(path->dentry,
- MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP);
- ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
-
- if (res)
- return res;
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30);
-
- stat->dev = inode->i_sb->s_dev;
- if (ll_need_32bit_api(sbi))
- stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
- else
- stat->ino = inode->i_ino;
- stat->mode = inode->i_mode;
- stat->uid = inode->i_uid;
- stat->gid = inode->i_gid;
- stat->rdev = inode->i_rdev;
- stat->atime = inode->i_atime;
- stat->mtime = inode->i_mtime;
- stat->ctime = inode->i_ctime;
- stat->blksize = 1 << inode->i_blkbits;
-
- stat->nlink = inode->i_nlink;
- stat->size = i_size_read(inode);
- stat->blocks = inode->i_blocks;
-
- return 0;
-}
-
-static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len)
-{
- int rc;
- size_t num_bytes;
- struct fiemap *fiemap;
- unsigned int extent_count = fieinfo->fi_extents_max;
-
- num_bytes = sizeof(*fiemap) + (extent_count *
- sizeof(struct fiemap_extent));
- fiemap = kvzalloc(num_bytes, GFP_KERNEL);
- if (!fiemap)
- return -ENOMEM;
-
- fiemap->fm_flags = fieinfo->fi_flags;
- fiemap->fm_extent_count = fieinfo->fi_extents_max;
- fiemap->fm_start = start;
- fiemap->fm_length = len;
-
- if (extent_count > 0 &&
- copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
- sizeof(struct fiemap_extent))) {
- rc = -EFAULT;
- goto out;
- }
-
- rc = ll_do_fiemap(inode, fiemap, num_bytes);
-
- fieinfo->fi_flags = fiemap->fm_flags;
- fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
- if (extent_count > 0 &&
- copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
- fiemap->fm_mapped_extents *
- sizeof(struct fiemap_extent))) {
- rc = -EFAULT;
- goto out;
- }
-out:
- kvfree(fiemap);
- return rc;
-}
-
-struct posix_acl *ll_get_acl(struct inode *inode, int type)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct posix_acl *acl = NULL;
-
- spin_lock(&lli->lli_lock);
- /* VFS' acl_permission_check->check_acl will release the refcount */
- acl = posix_acl_dup(lli->lli_posix_acl);
- spin_unlock(&lli->lli_lock);
-
- return acl;
-}
-
-int ll_inode_permission(struct inode *inode, int mask)
-{
- struct ll_sb_info *sbi;
- struct root_squash_info *squash;
- const struct cred *old_cred = NULL;
- struct cred *cred = NULL;
- bool squash_id = false;
- cfs_cap_t cap;
- int rc = 0;
-
- if (mask & MAY_NOT_BLOCK)
- return -ECHILD;
-
- /* as root inode are NOT getting validated in lookup operation,
- * need to do it before permission check.
- */
-
- if (is_root_inode(inode)) {
- rc = __ll_inode_revalidate(inode->i_sb->s_root,
- MDS_INODELOCK_LOOKUP);
- if (rc)
- return rc;
- }
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), inode mode %x mask %o\n",
- PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
-
- /* squash fsuid/fsgid if needed */
- sbi = ll_i2sbi(inode);
- squash = &sbi->ll_squash;
- if (unlikely(squash->rsi_uid &&
- uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
- !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
- squash_id = true;
- }
-
- if (squash_id) {
- CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
- __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
- squash->rsi_uid, squash->rsi_gid);
-
- /*
- * update current process's credentials
- * and FS capability
- */
- cred = prepare_creds();
- if (!cred)
- return -ENOMEM;
-
- cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
- cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
- for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
- if ((1 << cap) & CFS_CAP_FS_MASK)
- cap_lower(cred->cap_effective, cap);
- }
- old_cred = override_creds(cred);
- }
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
- rc = generic_permission(inode, mask);
-
- /* restore current process's credentials and FS capability */
- if (squash_id) {
- revert_creds(old_cred);
- put_cred(cred);
- }
-
- return rc;
-}
-
-/* -o localflock - only provides locally consistent flock locks */
-const struct file_operations ll_file_operations = {
- .read_iter = ll_file_read_iter,
- .write_iter = ll_file_write_iter,
- .unlocked_ioctl = ll_file_ioctl,
- .open = ll_file_open,
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
- .splice_read = generic_file_splice_read,
- .fsync = ll_fsync,
- .flush = ll_flush
-};
-
-const struct file_operations ll_file_operations_flock = {
- .read_iter = ll_file_read_iter,
- .write_iter = ll_file_write_iter,
- .unlocked_ioctl = ll_file_ioctl,
- .open = ll_file_open,
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
- .splice_read = generic_file_splice_read,
- .fsync = ll_fsync,
- .flush = ll_flush,
- .flock = ll_file_flock,
- .lock = ll_file_flock
-};
-
-/* These are for -o noflock - to return ENOSYS on flock calls */
-const struct file_operations ll_file_operations_noflock = {
- .read_iter = ll_file_read_iter,
- .write_iter = ll_file_write_iter,
- .unlocked_ioctl = ll_file_ioctl,
- .open = ll_file_open,
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
- .splice_read = generic_file_splice_read,
- .fsync = ll_fsync,
- .flush = ll_flush,
- .flock = ll_file_noflock,
- .lock = ll_file_noflock
-};
-
-const struct inode_operations ll_file_inode_operations = {
- .setattr = ll_setattr,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
- .fiemap = ll_fiemap,
- .get_acl = ll_get_acl,
-};
-
-/* dynamic ioctl number support routines */
-static struct llioc_ctl_data {
- struct rw_semaphore ioc_sem;
- struct list_head ioc_head;
-} llioc = {
- __RWSEM_INITIALIZER(llioc.ioc_sem),
- LIST_HEAD_INIT(llioc.ioc_head)
-};
-
-struct llioc_data {
- struct list_head iocd_list;
- unsigned int iocd_size;
- llioc_callback_t iocd_cb;
- unsigned int iocd_count;
- unsigned int iocd_cmd[0];
-};
-
-void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
-{
- unsigned int size;
- struct llioc_data *in_data = NULL;
-
- if (!cb || !cmd || count > LLIOC_MAX_CMD || count < 0)
- return NULL;
-
- size = sizeof(*in_data) + count * sizeof(unsigned int);
- in_data = kzalloc(size, GFP_NOFS);
- if (!in_data)
- return NULL;
-
- in_data->iocd_size = size;
- in_data->iocd_cb = cb;
- in_data->iocd_count = count;
- memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
-
- down_write(&llioc.ioc_sem);
- list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
- up_write(&llioc.ioc_sem);
-
- return in_data;
-}
-EXPORT_SYMBOL(ll_iocontrol_register);
-
-void ll_iocontrol_unregister(void *magic)
-{
- struct llioc_data *tmp;
-
- if (!magic)
- return;
-
- down_write(&llioc.ioc_sem);
- list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
- if (tmp == magic) {
- list_del(&tmp->iocd_list);
- up_write(&llioc.ioc_sem);
-
- kfree(tmp);
- return;
- }
- }
- up_write(&llioc.ioc_sem);
-
- CWARN("didn't find iocontrol register block with magic: %p\n", magic);
-}
-EXPORT_SYMBOL(ll_iocontrol_unregister);
-
-static enum llioc_iter
-ll_iocontrol_call(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg, int *rcp)
-{
- enum llioc_iter ret = LLIOC_CONT;
- struct llioc_data *data;
- int rc = -EINVAL, i;
-
- down_read(&llioc.ioc_sem);
- list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
- for (i = 0; i < data->iocd_count; i++) {
- if (cmd != data->iocd_cmd[i])
- continue;
-
- ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
- break;
- }
-
- if (ret == LLIOC_STOP)
- break;
- }
- up_read(&llioc.ioc_sem);
-
- if (rcp)
- *rcp = rc;
- return ret;
-}
-
-int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *obj = lli->lli_clob;
- struct lu_env *env;
- int rc;
- u16 refcheck;
-
- if (!obj)
- return 0;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- rc = cl_conf_set(env, obj, conf);
- if (rc < 0)
- goto out;
-
- if (conf->coc_opc == OBJECT_CONF_SET) {
- struct ldlm_lock *lock = conf->coc_lock;
- struct cl_layout cl = {
- .cl_layout_gen = 0,
- };
-
- LASSERT(lock);
- LASSERT(ldlm_has_layout(lock));
-
- /* it can only be allowed to match after layout is
- * applied to inode otherwise false layout would be
- * seen. Applying layout should happen before dropping
- * the intent lock.
- */
- ldlm_lock_allow_match(lock);
-
- rc = cl_object_layout_get(env, obj, &cl);
- if (rc < 0)
- goto out;
-
- CDEBUG(D_VFSTRACE, DFID ": layout version change: %u -> %u\n",
- PFID(&lli->lli_fid), ll_layout_version_get(lli),
- cl.cl_layout_gen);
- ll_layout_version_set(lli, cl.cl_layout_gen);
- }
-out:
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-/* Fetch layout from MDT with getxattr request, if it's not ready yet */
-static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
-
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req;
- struct mdt_body *body;
- void *lvbdata;
- void *lmm;
- int lmmsize;
- int rc;
-
- CDEBUG(D_INODE, DFID " LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
- PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
- lock->l_lvb_data, lock->l_lvb_len);
-
- if (lock->l_lvb_data && ldlm_is_lvb_ready(lock))
- return 0;
-
- /* if layout lock was granted right away, the layout is returned
- * within DLM_LVB of dlm reply; otherwise if the lock was ever
- * blocked and then granted via completion ast, we have to fetch
- * layout here. Please note that we can't use the LVB buffer in
- * completion AST because it doesn't have a large enough buffer
- */
- rc = ll_get_default_mdsize(sbi, &lmmsize);
- if (rc == 0)
- rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
- OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
- lmmsize, 0, &req);
- if (rc < 0)
- return rc;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EPROTO;
- goto out;
- }
-
- lmmsize = body->mbo_eadatasize;
- if (lmmsize == 0) /* empty layout */ {
- rc = 0;
- goto out;
- }
-
- lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
- if (!lmm) {
- rc = -EFAULT;
- goto out;
- }
-
- lvbdata = kvzalloc(lmmsize, GFP_NOFS);
- if (!lvbdata) {
- rc = -ENOMEM;
- goto out;
- }
-
- memcpy(lvbdata, lmm, lmmsize);
- lock_res_and_lock(lock);
- if (lock->l_lvb_data)
- kvfree(lock->l_lvb_data);
-
- lock->l_lvb_data = lvbdata;
- lock->l_lvb_len = lmmsize;
- unlock_res_and_lock(lock);
-
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-/**
- * Apply the layout to the inode. Layout lock is held and will be released
- * in this function.
- */
-static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
- struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ldlm_lock *lock;
- struct cl_object_conf conf;
- int rc = 0;
- bool lvb_ready;
- bool wait_layout = false;
-
- LASSERT(lustre_handle_is_used(lockh));
-
- lock = ldlm_handle2lock(lockh);
- LASSERT(lock);
- LASSERT(ldlm_has_layout(lock));
-
- LDLM_DEBUG(lock, "File " DFID "(%p) being reconfigured",
- PFID(&lli->lli_fid), inode);
-
- /* in case this is a caching lock and reinstate with new inode */
- md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL);
-
- lock_res_and_lock(lock);
- lvb_ready = ldlm_is_lvb_ready(lock);
- unlock_res_and_lock(lock);
- /* checking lvb_ready is racy but this is okay. The worst case is
- * that multi processes may configure the file on the same time.
- */
- if (lvb_ready) {
- rc = 0;
- goto out;
- }
-
- rc = ll_layout_fetch(inode, lock);
- if (rc < 0)
- goto out;
-
- /* for layout lock, lmm is returned in lock's lvb.
- * lvb_data is immutable if the lock is held so it's safe to access it
- * without res lock.
- *
- * set layout to file. Unlikely this will fail as old layout was
- * surely eliminated
- */
- memset(&conf, 0, sizeof(conf));
- conf.coc_opc = OBJECT_CONF_SET;
- conf.coc_inode = inode;
- conf.coc_lock = lock;
- conf.u.coc_layout.lb_buf = lock->l_lvb_data;
- conf.u.coc_layout.lb_len = lock->l_lvb_len;
- rc = ll_layout_conf(inode, &conf);
-
- /* refresh layout failed, need to wait */
- wait_layout = rc == -EBUSY;
-
-out:
- LDLM_LOCK_PUT(lock);
- ldlm_lock_decref(lockh, mode);
-
- /* wait for IO to complete if it's still being used. */
- if (wait_layout) {
- CDEBUG(D_INODE, "%s: " DFID "(%p) wait for layout reconf\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid), inode);
-
- memset(&conf, 0, sizeof(conf));
- conf.coc_opc = OBJECT_CONF_WAIT;
- conf.coc_inode = inode;
- rc = ll_layout_conf(inode, &conf);
- if (rc == 0)
- rc = -EAGAIN;
-
- CDEBUG(D_INODE,
- "%s: file=" DFID " waiting layout return: %d.\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid), rc);
- }
- return rc;
-}
-
-static int ll_layout_refresh_locked(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct md_op_data *op_data;
- struct lookup_intent it;
- struct lustre_handle lockh;
- enum ldlm_mode mode;
- struct ldlm_enqueue_info einfo = {
- .ei_type = LDLM_IBITS,
- .ei_mode = LCK_CR,
- .ei_cb_bl = &ll_md_blocking_ast,
- .ei_cb_cp = &ldlm_completion_ast,
- };
- int rc;
-
-again:
- /* mostly layout lock is caching on the local side, so try to match
- * it before grabbing layout lock mutex.
- */
- mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
- LCK_CR | LCK_CW | LCK_PR | LCK_PW);
- if (mode != 0) { /* hit cached lock */
- rc = ll_layout_lock_set(&lockh, mode, inode);
- if (rc == -EAGAIN)
- goto again;
- return rc;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
- 0, 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- /* have to enqueue one */
- memset(&it, 0, sizeof(it));
- it.it_op = IT_LAYOUT;
- lockh.cookie = 0ULL;
-
- LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file " DFID "(%p)",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid), inode);
-
- rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
- ptlrpc_req_finished(it.it_request);
- it.it_request = NULL;
-
- ll_finish_md_op_data(op_data);
-
- mode = it.it_lock_mode;
- it.it_lock_mode = 0;
- ll_intent_drop_lock(&it);
-
- if (rc == 0) {
- /* set lock data in case this is a new lock */
- ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
- rc = ll_layout_lock_set(&lockh, mode, inode);
- if (rc == -EAGAIN)
- goto again;
- }
-
- return rc;
-}
-
-/**
- * This function checks if there exists a LAYOUT lock on the client side,
- * or enqueues it if it doesn't have one in cache.
- *
- * This function will not hold layout lock so it may be revoked any time after
- * this function returns. Any operations depend on layout should be redone
- * in that case.
- *
- * This function should be called before lov_io_init() to get an uptodate
- * layout version, the caller should save the version number and after IO
- * is finished, this function should be called again to verify that layout
- * is not changed during IO time.
- */
-int ll_layout_refresh(struct inode *inode, __u32 *gen)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc;
-
- *gen = ll_layout_version_get(lli);
- if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
- return 0;
-
- /* sanity checks */
- LASSERT(fid_is_sane(ll_inode2fid(inode)));
- LASSERT(S_ISREG(inode->i_mode));
-
- /* take layout lock mutex to enqueue layout lock exclusively. */
- mutex_lock(&lli->lli_layout_mutex);
-
- rc = ll_layout_refresh_locked(inode);
- if (rc < 0)
- goto out;
-
- *gen = ll_layout_version_get(lli);
-out:
- mutex_unlock(&lli->lli_layout_mutex);
-
- return rc;
-}
-
-/**
- * This function send a restore request to the MDT
- */
-int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
-{
- struct hsm_user_request *hur;
- int len, rc;
-
- len = sizeof(struct hsm_user_request) +
- sizeof(struct hsm_user_item);
- hur = kzalloc(len, GFP_NOFS);
- if (!hur)
- return -ENOMEM;
-
- hur->hur_request.hr_action = HUA_RESTORE;
- hur->hur_request.hr_archive_id = 0;
- hur->hur_request.hr_flags = 0;
- memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
- sizeof(hur->hur_user_item[0].hui_fid));
- hur->hur_user_item[0].hui_extent.offset = offset;
- hur->hur_user_item[0].hui_extent.length = length;
- hur->hur_request.hr_itemcount = 1;
- rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,
- len, hur, NULL);
- kfree(hur);
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
deleted file mode 100644
index 3075358f3f08..000000000000
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ /dev/null
@@ -1,206 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * glimpse code shared between vvp and liblustre (and other Lustre clients in
- * the future).
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Oleg Drokin <oleg.drokin@sun.com>
- */
-
-#include <linux/libcfs/libcfs.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <obd.h>
-
-#include <lustre_dlm.h>
-#include <lustre_mdc.h>
-#include <linux/pagemap.h>
-#include <linux/file.h>
-
-#include <cl_object.h>
-#include "llite_internal.h"
-
-static const struct cl_lock_descr whole_file = {
- .cld_start = 0,
- .cld_end = CL_PAGE_EOF,
- .cld_mode = CLM_READ
-};
-
-/*
- * Check whether file has possible unwriten pages.
- *
- * \retval 1 file is mmap-ed or has dirty pages
- * 0 otherwise
- */
-blkcnt_t dirty_cnt(struct inode *inode)
-{
- blkcnt_t cnt = 0;
- struct vvp_object *vob = cl_inode2vvp(inode);
- void *results[1];
-
- if (inode->i_mapping)
- cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->i_pages,
- results, 0, 1,
- PAGECACHE_TAG_DIRTY);
- if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
- cnt = 1;
-
- return (cnt > 0) ? 1 : 0;
-}
-
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
- struct inode *inode, struct cl_object *clob, int agl)
-{
- const struct lu_fid *fid = lu_object_fid(&clob->co_lu);
- struct cl_lock *lock = vvp_env_lock(env);
- struct cl_lock_descr *descr = &lock->cll_descr;
- int result = 0;
-
- CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid));
-
- /* NOTE: this looks like DLM lock request, but it may
- * not be one. Due to CEF_ASYNC flag (translated
- * to LDLM_FL_HAS_INTENT by osc), this is
- * glimpse request, that won't revoke any
- * conflicting DLM locks held. Instead,
- * ll_glimpse_callback() will be called on each
- * client holding a DLM lock against this file,
- * and resulting size will be returned for each
- * stripe. DLM lock on [0, EOF] is acquired only
- * if there were no conflicting locks. If there
- * were conflicting locks, enqueuing or waiting
- * fails with -ENAVAIL, but valid inode
- * attributes are returned anyway.
- */
- *descr = whole_file;
- descr->cld_obj = clob;
- descr->cld_mode = CLM_READ;
- descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
- if (agl)
- descr->cld_enq_flags |= CEF_AGL;
- /*
- * CEF_ASYNC is used because glimpse sub-locks cannot
- * deadlock (because they never conflict with other
- * locks) and, hence, can be enqueued out-of-order.
- *
- * CEF_MUST protects glimpse lock from conversion into
- * a lockless mode.
- */
- result = cl_lock_request(env, io, lock);
- if (result < 0)
- return result;
-
- if (!agl) {
- ll_merge_attr(env, inode);
- if (i_size_read(inode) > 0 && !inode->i_blocks) {
- /*
- * LU-417: Add dirty pages block count
- * lest i_blocks reports 0, some "cp" or
- * "tar" may think it's a completely
- * sparse file and skip it.
- */
- inode->i_blocks = dirty_cnt(inode);
- }
- }
-
- cl_lock_release(env, lock);
-
- return result;
-}
-
-static int cl_io_get(struct inode *inode, struct lu_env **envout,
- struct cl_io **ioout, u16 *refcheck)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *clob = lli->lli_clob;
- int result;
-
- if (S_ISREG(inode->i_mode)) {
- env = cl_env_get(refcheck);
- if (!IS_ERR(env)) {
- io = vvp_env_thread_io(env);
- io->ci_obj = clob;
- *envout = env;
- *ioout = io;
- result = 1;
- } else {
- result = PTR_ERR(env);
- }
- } else {
- result = 0;
- }
- return result;
-}
-
-int cl_glimpse_size0(struct inode *inode, int agl)
-{
- /*
- * We don't need ast_flags argument to cl_glimpse_size(), because
- * osc_lock_enqueue() takes care of the possible deadlock that said
- * argument was introduced to avoid.
- */
- /*
- * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to
- * cl_glimpse_size(), which doesn't make sense: glimpse locks are not
- * blocking anyway.
- */
- struct lu_env *env = NULL;
- struct cl_io *io = NULL;
- int result;
- u16 refcheck;
-
- result = cl_io_get(inode, &env, &io, &refcheck);
- if (result > 0) {
-again:
- io->ci_verify_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (result > 0)
- /*
- * nothing to do for this io. This currently happens
- * when stripe sub-object's are not yet created.
- */
- result = io->ci_result;
- else if (result == 0)
- result = cl_glimpse_lock(env, io, inode, io->ci_obj,
- agl);
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2);
- cl_io_fini(env, io);
- if (unlikely(io->ci_need_restart))
- goto again;
- cl_env_put(env, &refcheck);
- }
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
deleted file mode 100644
index df5c0c0ae703..000000000000
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ /dev/null
@@ -1,293 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl code shared between vvp and liblustre (and other Lustre clients in the
- * future).
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/quotaops.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/rbtree.h>
-
-#include <obd.h>
-#include <obd_support.h>
-#include <lustre_fid.h>
-#include <lustre_dlm.h>
-#include <lustre_mdc.h>
-#include <cl_object.h>
-
-#include "llite_internal.h"
-
-/*
- * ccc_ prefix stands for "Common Client Code".
- */
-
-/*****************************************************************************
- *
- * Vvp device and device type functions.
- *
- */
-
-/**
- * An `emergency' environment used by cl_inode_fini() when cl_env_get()
- * fails. Access to this environment is serialized by cl_inode_fini_guard
- * mutex.
- */
-struct lu_env *cl_inode_fini_env;
-u16 cl_inode_fini_refcheck;
-
-/**
- * A mutex serializing calls to slp_inode_fini() under extreme memory
- * pressure, when environments cannot be allocated.
- */
-static DEFINE_MUTEX(cl_inode_fini_guard);
-
-int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr,
- unsigned int attr_flags)
-{
- struct lu_env *env;
- struct cl_io *io;
- int result;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = obj;
- io->ci_verify_layout = 1;
-
- io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
- io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
- io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
- io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
- io->u.ci_setattr.sa_attr_flags = attr_flags;
- io->u.ci_setattr.sa_valid = attr->ia_valid;
- io->u.ci_setattr.sa_parent_fid = lu_object_fid(&obj->co_lu);
-
-again:
- if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
- struct vvp_io *vio = vvp_env_io(env);
-
- if (attr->ia_valid & ATTR_FILE)
- /* populate the file descriptor for ftruncate to honor
- * group lock - see LU-787
- */
- vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file);
-
- result = cl_io_loop(env, io);
- } else {
- result = io->ci_result;
- }
- cl_io_fini(env, io);
- if (unlikely(io->ci_need_restart))
- goto again;
-
- cl_env_put(env, &refcheck);
- return result;
-}
-
-/**
- * Initialize or update CLIO structures for regular files when new
- * meta-data arrives from the server.
- *
- * \param inode regular file inode
- * \param md new file metadata from MDS
- * - allocates cl_object if necessary,
- * - updated layout, if object was already here.
- */
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
-{
- struct lu_env *env;
- struct ll_inode_info *lli;
- struct cl_object *clob;
- struct lu_site *site;
- struct lu_fid *fid;
- struct cl_object_conf conf = {
- .coc_inode = inode,
- .u = {
- .coc_layout = md->layout,
- }
- };
- int result = 0;
- u16 refcheck;
-
- LASSERT(md->body->mbo_valid & OBD_MD_FLID);
- LASSERT(S_ISREG(inode->i_mode));
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- site = ll_i2sbi(inode)->ll_site;
- lli = ll_i2info(inode);
- fid = &lli->lli_fid;
- LASSERT(fid_is_sane(fid));
-
- if (!lli->lli_clob) {
- /* clob is slave of inode, empty lli_clob means for new inode,
- * there is no clob in cache with the given fid, so it is
- * unnecessary to perform lookup-alloc-lookup-insert, just
- * alloc and insert directly.
- */
- LASSERT(inode->i_state & I_NEW);
- conf.coc_lu.loc_flags = LOC_F_NEW;
- clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
- fid, &conf);
- if (!IS_ERR(clob)) {
- /*
- * No locking is necessary, as new inode is
- * locked by I_NEW bit.
- */
- lli->lli_clob = clob;
- lu_object_ref_add(&clob->co_lu, "inode", inode);
- } else {
- result = PTR_ERR(clob);
- }
- } else {
- result = cl_conf_set(env, lli->lli_clob, &conf);
- }
-
- cl_env_put(env, &refcheck);
-
- if (result != 0)
- CERROR("Failure to initialize cl object " DFID ": %d\n",
- PFID(fid), result);
- return result;
-}
-
-/**
- * Wait for others drop their references of the object at first, then we drop
- * the last one, which will lead to the object be destroyed immediately.
- * Must be called after cl_object_kill() against this object.
- *
- * The reason we want to do this is: destroying top object will wait for sub
- * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
- * to initiate top object destroying which may deadlock. See bz22520.
- */
-static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
-{
- struct lu_object_header *header = obj->co_lu.lo_header;
- wait_queue_entry_t waiter;
-
- if (unlikely(atomic_read(&header->loh_ref) != 1)) {
- struct lu_site *site = obj->co_lu.lo_dev->ld_site;
- struct lu_site_bkt_data *bkt;
-
- bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
-
- init_waitqueue_entry(&waiter, current);
- add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
-
- while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (atomic_read(&header->loh_ref) == 1)
- break;
- schedule();
- }
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
- }
-
- cl_object_put(env, obj);
-}
-
-void cl_inode_fini(struct inode *inode)
-{
- struct lu_env *env;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *clob = lli->lli_clob;
- u16 refcheck;
- int emergency;
-
- if (clob) {
- env = cl_env_get(&refcheck);
- emergency = IS_ERR(env);
- if (emergency) {
- mutex_lock(&cl_inode_fini_guard);
- LASSERT(cl_inode_fini_env);
- env = cl_inode_fini_env;
- }
- /*
- * cl_object cache is a slave to inode cache (which, in turn
- * is a slave to dentry cache), don't keep cl_object in memory
- * when its master is evicted.
- */
- cl_object_kill(env, clob);
- lu_object_ref_del(&clob->co_lu, "inode", inode);
- cl_object_put_last(env, clob);
- lli->lli_clob = NULL;
- if (emergency)
- mutex_unlock(&cl_inode_fini_guard);
- else
- cl_env_put(env, &refcheck);
- }
-}
-
-/**
- * build inode number from passed @fid
- */
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
-{
- if (BITS_PER_LONG == 32 || api32)
- return fid_flatten32(fid);
- else
- return fid_flatten(fid);
-}
-
-/**
- * build inode generation from passed @fid. If our FID overflows the 32-bit
- * inode number then return a non-zero generation to distinguish them.
- */
-__u32 cl_fid_build_gen(const struct lu_fid *fid)
-{
- __u32 gen;
-
- if (fid_is_igif(fid)) {
- gen = lu_igif_gen(fid);
- return gen;
- }
-
- gen = fid_flatten(fid) >> 32;
- return gen;
-}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
deleted file mode 100644
index a246b955306e..000000000000
--- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c
+++ /dev/null
@@ -1,186 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl code shared between vvp and liblustre (and other Lustre clients in the
- * future).
- *
- */
-#define DEBUG_SUBSYSTEM S_LLITE
-#include <obd_class.h>
-#include <obd_support.h>
-#include <obd.h>
-#include <cl_object.h>
-
-#include "llite_internal.h"
-
-/* Initialize the default and maximum LOV EA and cookie sizes. This allows
- * us to make MDS RPCs with large enough reply buffers to hold the
- * maximum-sized (= maximum striped) EA and cookie without having to
- * calculate this (via a call into the LOV + OSCs) each time we make an RPC.
- */
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
-{
- u32 val_size, max_easize, def_easize;
- int rc;
-
- val_size = sizeof(max_easize);
- rc = obd_get_info(NULL, dt_exp, sizeof(KEY_MAX_EASIZE), KEY_MAX_EASIZE,
- &val_size, &max_easize);
- if (rc)
- return rc;
-
- val_size = sizeof(def_easize);
- rc = obd_get_info(NULL, dt_exp, sizeof(KEY_DEFAULT_EASIZE),
- KEY_DEFAULT_EASIZE, &val_size, &def_easize);
- if (rc)
- return rc;
-
- /*
- * default cookiesize is 0 because from 2.4 server doesn't send
- * llog cookies to client.
- */
- CDEBUG(D_HA, "updating def/max_easize: %d/%d\n",
- def_easize, max_easize);
-
- rc = md_init_ea_size(md_exp, max_easize, def_easize);
- return rc;
-}
-
-/**
- * This function is used as an upcall-callback hooked by liblustre and llite
- * clients into obd_notify() listeners chain to handle notifications about
- * change of import connect_flags. See llu_fsswop_mount() and
- * lustre_common_fill_super().
- */
-int cl_ocd_update(struct obd_device *host,
- struct obd_device *watched,
- enum obd_notify_event ev, void *owner, void *data)
-{
- struct lustre_client_ocd *lco;
- struct client_obd *cli;
- __u64 flags;
- int result;
-
- if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) &&
- watched->obd_set_up && !watched->obd_stopping) {
- cli = &watched->u.cli;
- lco = owner;
- flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
- CDEBUG(D_SUPER, "Changing connect_flags: %#llx -> %#llx\n",
- lco->lco_flags, flags);
- mutex_lock(&lco->lco_lock);
- lco->lco_flags &= flags;
- /* for each osc event update ea size */
- if (lco->lco_dt_exp)
- cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp);
-
- mutex_unlock(&lco->lco_lock);
- result = 0;
- } else {
- CERROR("unexpected notification from %s %s (setup:%d,stopping:%d)!\n",
- watched->obd_type->typ_name,
- watched->obd_name, watched->obd_set_up,
- watched->obd_stopping);
- result = -EINVAL;
- }
- return result;
-}
-
-#define GROUPLOCK_SCOPE "grouplock"
-
-int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
- struct ll_grouplock *cg)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct cl_lock *lock;
- struct cl_lock_descr *descr;
- __u32 enqflags;
- u16 refcheck;
- int rc;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = obj;
-
- rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (rc != 0) {
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
- /* Does not make sense to take GL for released layout */
- if (rc > 0)
- rc = -ENOTSUPP;
- return rc;
- }
-
- lock = vvp_env_lock(env);
- descr = &lock->cll_descr;
- descr->cld_obj = obj;
- descr->cld_start = 0;
- descr->cld_end = CL_PAGE_EOF;
- descr->cld_gid = gid;
- descr->cld_mode = CLM_GROUP;
-
- enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
- descr->cld_enq_flags = enqflags;
-
- rc = cl_lock_request(env, io, lock);
- if (rc < 0) {
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
- return rc;
- }
-
- cg->lg_env = env;
- cg->lg_io = io;
- cg->lg_lock = lock;
- cg->lg_gid = gid;
-
- return 0;
-}
-
-void cl_put_grouplock(struct ll_grouplock *cg)
-{
- struct lu_env *env = cg->lg_env;
- struct cl_io *io = cg->lg_io;
- struct cl_lock *lock = cg->lg_lock;
-
- LASSERT(cg->lg_env);
- LASSERT(cg->lg_gid);
-
- cl_lock_release(env, lock);
- cl_io_fini(env, io);
- cl_env_put(env, NULL);
-}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
deleted file mode 100644
index d46bcf71b273..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ /dev/null
@@ -1,1337 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef LLITE_INTERNAL_H
-#define LLITE_INTERNAL_H
-#include <lustre_debug.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-#include <lustre_disk.h> /* for s2sbi */
-#include <lustre_linkea.h>
-
-/* for struct cl_lock_descr and struct cl_io */
-#include <lustre_patchless_compat.h>
-#include <lustre_compat.h>
-#include <cl_object.h>
-#include <lustre_lmv.h>
-#include <lustre_mdc.h>
-#include <lustre_intent.h>
-#include <linux/compat.h>
-#include <linux/namei.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
-#include "vvp_internal.h"
-#include "range_lock.h"
-
-#ifndef FMODE_EXEC
-#define FMODE_EXEC 0
-#endif
-
-#ifndef VM_FAULT_RETRY
-#define VM_FAULT_RETRY 0
-#endif
-
-/** Only used on client-side for indicating the tail of dir hash/offset. */
-#define LL_DIR_END_OFF 0x7fffffffffffffffULL
-#define LL_DIR_END_OFF_32BIT 0x7fffffffUL
-
-/* 4UL * 1024 * 1024 */
-#define LL_MAX_BLKSIZE_BITS 22
-
-#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-
-struct ll_dentry_data {
- struct lookup_intent *lld_it;
- unsigned int lld_sa_generation;
- unsigned int lld_invalid:1;
- unsigned int lld_nfs_dentry:1;
- struct rcu_head lld_rcu_head;
-};
-
-#define ll_d2d(de) ((struct ll_dentry_data *)((de)->d_fsdata))
-
-#define LLI_INODE_MAGIC 0x111d0de5
-#define LLI_INODE_DEAD 0xdeadd00d
-
-struct ll_getname_data {
- struct dir_context ctx;
- char *lgd_name; /* points to buffer with NAME_MAX+1 size */
- struct lu_fid lgd_fid; /* target fid we are looking for */
- int lgd_found; /* inode matched? */
-};
-
-struct ll_grouplock {
- struct lu_env *lg_env;
- struct cl_io *lg_io;
- struct cl_lock *lg_lock;
- unsigned long lg_gid;
-};
-
-enum ll_file_flags {
- /* File data is modified. */
- LLIF_DATA_MODIFIED = 0,
- /* File is being restored */
- LLIF_FILE_RESTORING = 1,
- /* Xattr cache is attached to the file */
- LLIF_XATTR_CACHE = 2,
-};
-
-struct ll_inode_info {
- __u32 lli_inode_magic;
-
- spinlock_t lli_lock;
- unsigned long lli_flags;
- struct posix_acl *lli_posix_acl;
-
- /* identifying fields for both metadata and data stacks. */
- struct lu_fid lli_fid;
- /* master inode fid for stripe directory */
- struct lu_fid lli_pfid;
-
- /* We need all three because every inode may be opened in different
- * modes
- */
- struct obd_client_handle *lli_mds_read_och;
- struct obd_client_handle *lli_mds_write_och;
- struct obd_client_handle *lli_mds_exec_och;
- __u64 lli_open_fd_read_count;
- __u64 lli_open_fd_write_count;
- __u64 lli_open_fd_exec_count;
- /* Protects access to och pointers and their usage counters */
- struct mutex lli_och_mutex;
-
- struct inode lli_vfs_inode;
-
- /* the most recent timestamps obtained from mds */
- s64 lli_atime;
- s64 lli_mtime;
- s64 lli_ctime;
- spinlock_t lli_agl_lock;
-
- /* Try to make the d::member and f::member are aligned. Before using
- * these members, make clear whether it is directory or not.
- */
- union {
- /* for directory */
- struct {
- /* serialize normal readdir and statahead-readdir. */
- struct mutex lli_readdir_mutex;
-
- /* metadata statahead */
- /* since parent-child threads can share the same @file
- * struct, "opendir_key" is the token when dir close for
- * case of parent exit before child -- it is me should
- * cleanup the dir readahead.
- */
- void *lli_opendir_key;
- struct ll_statahead_info *lli_sai;
- /* protect statahead stuff. */
- spinlock_t lli_sa_lock;
- /* "opendir_pid" is the token when lookup/revalidate
- * -- I am the owner of dir statahead.
- */
- pid_t lli_opendir_pid;
- /* stat will try to access statahead entries or start
- * statahead if this flag is set, and this flag will be
- * set upon dir open, and cleared when dir is closed,
- * statahead hit ratio is too low, or start statahead
- * thread failed.
- */
- unsigned int lli_sa_enabled:1;
- /* generation for statahead */
- unsigned int lli_sa_generation;
- /* directory stripe information */
- struct lmv_stripe_md *lli_lsm_md;
- /* default directory stripe offset. This is extracted
- * from the "dmv" xattr in order to decide which MDT to
- * create a subdirectory on. The MDS itself fetches
- * "dmv" and gets the rest of the default layout itself
- * (count, hash, etc).
- */
- __u32 lli_def_stripe_offset;
- };
-
- /* for non-directory */
- struct {
- struct mutex lli_size_mutex;
- char *lli_symlink_name;
- /*
- * struct rw_semaphore {
- * signed long count; // align d.d_def_acl
- * spinlock_t wait_lock; // align d.d_sa_lock
- * struct list_head wait_list;
- * }
- */
- struct rw_semaphore lli_trunc_sem;
- struct range_lock_tree lli_write_tree;
-
- struct rw_semaphore lli_glimpse_sem;
- unsigned long lli_glimpse_time;
- struct list_head lli_agl_list;
- __u64 lli_agl_index;
-
- /* for writepage() only to communicate to fsync */
- int lli_async_rc;
-
- /*
- * whenever a process try to read/write the file, the
- * jobid of the process will be saved here, and it'll
- * be packed into the write PRC when flush later.
- *
- * so the read/write statistics for jobid will not be
- * accurate if the file is shared by different jobs.
- */
- char lli_jobid[LUSTRE_JOBID_SIZE];
- };
- };
-
- /* XXX: For following frequent used members, although they maybe special
- * used for non-directory object, it is some time-wasting to check
- * whether the object is directory or not before using them. On the
- * other hand, currently, sizeof(f) > sizeof(d), it cannot reduce
- * the "ll_inode_info" size even if moving those members into u.f.
- * So keep them out side.
- *
- * In the future, if more members are added only for directory,
- * some of the following members can be moved into u.f.
- */
- struct cl_object *lli_clob;
-
- /* mutex to request for layout lock exclusively. */
- struct mutex lli_layout_mutex;
- /* Layout version, protected by lli_layout_lock */
- __u32 lli_layout_gen;
- spinlock_t lli_layout_lock;
-
- struct rw_semaphore lli_xattrs_list_rwsem;
- struct mutex lli_xattrs_enq_lock;
- struct list_head lli_xattrs;/* ll_xattr_entry->xe_list */
-};
-
-static inline __u32 ll_layout_version_get(struct ll_inode_info *lli)
-{
- __u32 gen;
-
- spin_lock(&lli->lli_layout_lock);
- gen = lli->lli_layout_gen;
- spin_unlock(&lli->lli_layout_lock);
-
- return gen;
-}
-
-static inline void ll_layout_version_set(struct ll_inode_info *lli, __u32 gen)
-{
- spin_lock(&lli->lli_layout_lock);
- lli->lli_layout_gen = gen;
- spin_unlock(&lli->lli_layout_lock);
-}
-
-int ll_xattr_cache_destroy(struct inode *inode);
-
-int ll_xattr_cache_get(struct inode *inode, const char *name,
- char *buffer, size_t size, __u64 valid);
-
-int ll_init_security(struct dentry *dentry, struct inode *inode,
- struct inode *dir);
-
-/*
- * Locking to guarantee consistency of non-atomic updates to long long i_size,
- * consistency between file size and KMS.
- *
- * Implemented by ->lli_size_mutex and ->lsm_lock, nested in that order.
- */
-
-void ll_inode_size_lock(struct inode *inode);
-void ll_inode_size_unlock(struct inode *inode);
-
-/* FIXME: replace the name of this with LL_I to conform to kernel stuff */
-/* static inline struct ll_inode_info *LL_I(struct inode *inode) */
-static inline struct ll_inode_info *ll_i2info(struct inode *inode)
-{
- return container_of(inode, struct ll_inode_info, lli_vfs_inode);
-}
-
-/* default to about 64M of readahead on a given system. */
-#define SBI_DEFAULT_READAHEAD_MAX (64UL << (20 - PAGE_SHIFT))
-
-/* default to read-ahead full files smaller than 2MB on the second read */
-#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT))
-
-enum ra_stat {
- RA_STAT_HIT = 0,
- RA_STAT_MISS,
- RA_STAT_DISTANT_READPAGE,
- RA_STAT_MISS_IN_WINDOW,
- RA_STAT_FAILED_GRAB_PAGE,
- RA_STAT_FAILED_MATCH,
- RA_STAT_DISCARDED,
- RA_STAT_ZERO_LEN,
- RA_STAT_ZERO_WINDOW,
- RA_STAT_EOF,
- RA_STAT_MAX_IN_FLIGHT,
- RA_STAT_WRONG_GRAB_PAGE,
- RA_STAT_FAILED_REACH_END,
- _NR_RA_STAT,
-};
-
-struct ll_ra_info {
- atomic_t ra_cur_pages;
- unsigned long ra_max_pages;
- unsigned long ra_max_pages_per_file;
- unsigned long ra_max_read_ahead_whole_pages;
-};
-
-/* ra_io_arg will be filled in the beginning of ll_readahead with
- * ras_lock, then the following ll_read_ahead_pages will read RA
- * pages according to this arg, all the items in this structure are
- * counted by page index.
- */
-struct ra_io_arg {
- unsigned long ria_start; /* start offset of read-ahead*/
- unsigned long ria_end; /* end offset of read-ahead*/
- unsigned long ria_reserved; /* reserved pages for read-ahead */
- unsigned long ria_end_min; /* minimum end to cover current read */
- bool ria_eof; /* reach end of file */
- /* If stride read pattern is detected, ria_stoff means where
- * stride read is started. Note: for normal read-ahead, the
- * value here is meaningless, and also it will not be accessed
- */
- pgoff_t ria_stoff;
- /* ria_length and ria_pages are the length and pages length in the
- * stride I/O mode. And they will also be used to check whether
- * it is stride I/O read-ahead in the read-ahead pages
- */
- unsigned long ria_length;
- unsigned long ria_pages;
-};
-
-/* LL_HIST_MAX=32 causes an overflow */
-#define LL_HIST_MAX 28
-#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
-#define LL_PROCESS_HIST_MAX 10
-struct per_process_info {
- pid_t pid;
- struct obd_histogram pp_r_hist;
- struct obd_histogram pp_w_hist;
-};
-
-/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
-struct ll_rw_extents_info {
- struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
-};
-
-#define LL_OFFSET_HIST_MAX 100
-struct ll_rw_process_info {
- pid_t rw_pid;
- int rw_op;
- loff_t rw_range_start;
- loff_t rw_range_end;
- loff_t rw_last_file_pos;
- loff_t rw_offset;
- size_t rw_smallest_extent;
- size_t rw_largest_extent;
- struct ll_file_data *rw_last_file;
-};
-
-enum stats_track_type {
- STATS_TRACK_ALL = 0, /* track all processes */
- STATS_TRACK_PID, /* track process with this pid */
- STATS_TRACK_PPID, /* track processes with this ppid */
- STATS_TRACK_GID, /* track processes with this gid */
- STATS_TRACK_LAST,
-};
-
-/* flags for sbi->ll_flags */
-#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */
-#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */
-#define LL_SBI_FLOCK 0x04
-#define LL_SBI_USER_XATTR 0x08 /* support user xattr */
-#define LL_SBI_ACL 0x10 /* support ACL */
-/* LL_SBI_RMT_CLIENT 0x40 remote client */
-#define LL_SBI_MDS_CAPA 0x80 /* support mds capa, obsolete */
-#define LL_SBI_OSS_CAPA 0x100 /* support oss capa, obsolete */
-#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */
-#define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */
-#define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */
-/* LL_SBI_SOM_PREVIEW 0x1000 SOM preview mount option, obsolete */
-#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
-#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */
-#define LL_SBI_AGL_ENABLED 0x8000 /* enable agl */
-#define LL_SBI_VERBOSE 0x10000 /* verbose mount/umount */
-#define LL_SBI_LAYOUT_LOCK 0x20000 /* layout lock support */
-#define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */
-#define LL_SBI_XATTR_CACHE 0x80000 /* support for xattr cache */
-#define LL_SBI_NOROOTSQUASH 0x100000 /* do not apply root squash */
-#define LL_SBI_ALWAYS_PING 0x200000 /* always ping even if server
- * suppress_pings
- */
-
-#define LL_SBI_FLAGS { \
- "nolck", \
- "checksum", \
- "flock", \
- "user_xattr", \
- "acl", \
- "???", \
- "???", \
- "mds_capa", \
- "oss_capa", \
- "flock", \
- "lru_resize", \
- "lazy_statfs", \
- "som", \
- "32bit_api", \
- "64bit_hash", \
- "agl", \
- "verbose", \
- "layout", \
- "user_fid2path",\
- "xattr_cache", \
- "norootsquash", \
- "always_ping", \
-}
-
-/*
- * This is embedded into llite super-blocks to keep track of connect
- * flags (capabilities) supported by all imports given mount is
- * connected to.
- */
-struct lustre_client_ocd {
- /*
- * This is conjunction of connect_flags across all imports
- * (LOVs) this mount is connected to. This field is updated by
- * cl_ocd_update() under ->lco_lock.
- */
- __u64 lco_flags;
- struct mutex lco_lock;
- struct obd_export *lco_md_exp;
- struct obd_export *lco_dt_exp;
-};
-
-struct ll_sb_info {
- /* this protects pglist and ra_info. It isn't safe to
- * grab from interrupt contexts
- */
- spinlock_t ll_lock;
- spinlock_t ll_pp_extent_lock; /* pp_extent entry*/
- spinlock_t ll_process_lock; /* ll_rw_process_info */
- struct obd_uuid ll_sb_uuid;
- struct obd_export *ll_md_exp;
- struct obd_export *ll_dt_exp;
- struct dentry *ll_debugfs_entry;
- struct lu_fid ll_root_fid; /* root object fid */
-
- int ll_flags;
- unsigned int ll_umounting:1,
- ll_xattr_cache_enabled:1,
- ll_client_common_fill_super_succeeded:1;
-
- struct lustre_client_ocd ll_lco;
-
- struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
-
- /*
- * Used to track "unstable" pages on a client, and maintain a
- * LRU list of clean pages. An "unstable" page is defined as
- * any page which is sent to a server as part of a bulk request,
- * but is uncommitted to stable storage.
- */
- struct cl_client_cache *ll_cache;
-
- struct lprocfs_stats *ll_ra_stats;
-
- struct ll_ra_info ll_ra_info;
- unsigned int ll_namelen;
- const struct file_operations *ll_fop;
-
- unsigned int ll_md_brw_pages; /* readdir pages per RPC */
-
- struct lu_site *ll_site;
- struct cl_device *ll_cl;
- /* Statistics */
- struct ll_rw_extents_info ll_rw_extents_info;
- int ll_extent_process_count;
- struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
- unsigned int ll_offset_process_count;
- struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
- unsigned int ll_rw_offset_entry_count;
- int ll_stats_track_id;
- enum stats_track_type ll_stats_track_type;
- int ll_rw_stats_on;
-
- /* metadata stat-ahead */
- unsigned int ll_sa_max; /* max statahead RPCs */
- atomic_t ll_sa_total; /* statahead thread started
- * count
- */
- atomic_t ll_sa_wrong; /* statahead thread stopped for
- * low hit ratio
- */
- atomic_t ll_sa_running; /* running statahead thread
- * count
- */
- atomic_t ll_agl_total; /* AGL thread started count */
-
- dev_t ll_sdev_orig; /* save s_dev before assign for
- * clustered nfs
- */
- /* root squash */
- struct root_squash_info ll_squash;
- struct path ll_mnt;
-
- __kernel_fsid_t ll_fsid;
- struct kobject ll_kobj; /* sysfs object */
- struct super_block *ll_sb; /* struct super_block (for sysfs code)*/
- struct completion ll_kobj_unregister;
-};
-
-/*
- * per file-descriptor read-ahead data.
- */
-struct ll_readahead_state {
- spinlock_t ras_lock;
- /*
- * index of the last page that read(2) needed and that wasn't in the
- * cache. Used by ras_update() to detect seeks.
- *
- * XXX nikita: if access seeks into cached region, Lustre doesn't see
- * this.
- */
- unsigned long ras_last_readpage;
- /*
- * number of pages read after last read-ahead window reset. As window
- * is reset on each seek, this is effectively a number of consecutive
- * accesses. Maybe ->ras_accessed_in_window is better name.
- *
- * XXX nikita: window is also reset (by ras_update()) when Lustre
- * believes that memory pressure evicts read-ahead pages. In that
- * case, it probably doesn't make sense to expand window to
- * PTLRPC_MAX_BRW_PAGES on the third access.
- */
- unsigned long ras_consecutive_pages;
- /*
- * number of read requests after the last read-ahead window reset
- * As window is reset on each seek, this is effectively the number
- * on consecutive read request and is used to trigger read-ahead.
- */
- unsigned long ras_consecutive_requests;
- /*
- * Parameters of current read-ahead window. Handled by
- * ras_update(). On the initial access to the file or after a seek,
- * window is reset to 0. After 3 consecutive accesses, window is
- * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by
- * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
- */
- unsigned long ras_window_start, ras_window_len;
- /*
- * Optimal RPC size. It decides how many pages will be sent
- * for each read-ahead.
- */
- unsigned long ras_rpc_size;
- /*
- * Where next read-ahead should start at. This lies within read-ahead
- * window. Read-ahead window is read in pieces rather than at once
- * because: 1. lustre limits total number of pages under read-ahead by
- * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages
- * not covered by DLM lock.
- */
- unsigned long ras_next_readahead;
- /*
- * Total number of ll_file_read requests issued, reads originating
- * due to mmap are not counted in this total. This value is used to
- * trigger full file read-ahead after multiple reads to a small file.
- */
- unsigned long ras_requests;
- /*
- * Page index with respect to the current request, these value
- * will not be accurate when dealing with reads issued via mmap.
- */
- unsigned long ras_request_index;
- /*
- * The following 3 items are used for detecting the stride I/O
- * mode.
- * In stride I/O mode,
- * ...............|-----data-----|****gap*****|--------|******|....
- * offset |-stride_pages-|-stride_gap-|
- * ras_stride_offset = offset;
- * ras_stride_length = stride_pages + stride_gap;
- * ras_stride_pages = stride_pages;
- * Note: all these three items are counted by pages.
- */
- unsigned long ras_stride_length;
- unsigned long ras_stride_pages;
- pgoff_t ras_stride_offset;
- /*
- * number of consecutive stride request count, and it is similar as
- * ras_consecutive_requests, but used for stride I/O mode.
- * Note: only more than 2 consecutive stride request are detected,
- * stride read-ahead will be enable
- */
- unsigned long ras_consecutive_stride_requests;
-};
-
-extern struct kmem_cache *ll_file_data_slab;
-struct lustre_handle;
-struct ll_file_data {
- struct ll_readahead_state fd_ras;
- struct ll_grouplock fd_grouplock;
- __u64 lfd_pos;
- __u32 fd_flags;
- fmode_t fd_omode;
- /* openhandle if lease exists for this file.
- * Borrow lli->lli_och_mutex to protect assignment
- */
- struct obd_client_handle *fd_lease_och;
- struct obd_client_handle *fd_och;
- struct file *fd_file;
- /* Indicate whether need to report failure when close.
- * true: failure is known, not report again.
- * false: unknown failure, should report.
- */
- bool fd_write_failed;
- rwlock_t fd_lock; /* protect lcc list */
- struct list_head fd_lccs; /* list of ll_cl_context */
-};
-
-extern struct dentry *llite_root;
-extern struct kset *llite_kset;
-
-static inline struct inode *ll_info2i(struct ll_inode_info *lli)
-{
- return &lli->lli_vfs_inode;
-}
-
-__u32 ll_i2suppgid(struct inode *i);
-void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2);
-
-static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
-{
-#if BITS_PER_LONG == 32
- return 1;
-#elif defined(CONFIG_COMPAT)
- return unlikely(in_compat_syscall() ||
- (sbi->ll_flags & LL_SBI_32BIT_API));
-#else
- return unlikely(sbi->ll_flags & LL_SBI_32BIT_API);
-#endif
-}
-
-void ll_ras_enter(struct file *f);
-
-/* llite/lcommon_misc.c */
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
-int cl_ocd_update(struct obd_device *host,
- struct obd_device *watched,
- enum obd_notify_event ev, void *owner, void *data);
-int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
- struct ll_grouplock *cg);
-void cl_put_grouplock(struct ll_grouplock *cg);
-
-/* llite/lproc_llite.c */
-int ldebugfs_register_mountpoint(struct dentry *parent,
- struct super_block *sb, char *osc, char *mdc);
-void ldebugfs_unregister_mountpoint(struct ll_sb_info *sbi);
-void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count);
-void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars);
-void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
- struct ll_file_data *file, loff_t pos,
- size_t count, int rw);
-
-enum {
- LPROC_LL_DIRTY_HITS,
- LPROC_LL_DIRTY_MISSES,
- LPROC_LL_READ_BYTES,
- LPROC_LL_WRITE_BYTES,
- LPROC_LL_BRW_READ,
- LPROC_LL_BRW_WRITE,
- LPROC_LL_IOCTL,
- LPROC_LL_OPEN,
- LPROC_LL_RELEASE,
- LPROC_LL_MAP,
- LPROC_LL_LLSEEK,
- LPROC_LL_FSYNC,
- LPROC_LL_READDIR,
- LPROC_LL_SETATTR,
- LPROC_LL_TRUNC,
- LPROC_LL_FLOCK,
- LPROC_LL_GETATTR,
- LPROC_LL_CREATE,
- LPROC_LL_LINK,
- LPROC_LL_UNLINK,
- LPROC_LL_SYMLINK,
- LPROC_LL_MKDIR,
- LPROC_LL_RMDIR,
- LPROC_LL_MKNOD,
- LPROC_LL_RENAME,
- LPROC_LL_STAFS,
- LPROC_LL_ALLOC_INODE,
- LPROC_LL_SETXATTR,
- LPROC_LL_GETXATTR,
- LPROC_LL_GETXATTR_HITS,
- LPROC_LL_LISTXATTR,
- LPROC_LL_REMOVEXATTR,
- LPROC_LL_INODE_PERM,
- LPROC_LL_FILE_OPCODES
-};
-
-/* llite/dir.c */
-extern const struct file_operations ll_dir_operations;
-extern const struct inode_operations ll_dir_inode_operations;
-int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
- struct dir_context *ctx);
-int ll_get_mdt_idx(struct inode *inode);
-int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid);
-struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
- __u64 offset);
-void ll_release_page(struct inode *inode, struct page *page, bool remove);
-
-/* llite/namei.c */
-extern const struct inode_operations ll_special_inode_operations;
-
-struct inode *ll_iget(struct super_block *sb, ino_t hash,
- struct lustre_md *lic);
-int ll_test_inode_by_fid(struct inode *inode, void *opaque);
-int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, int flag);
-struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
-void ll_update_times(struct ptlrpc_request *request, struct inode *inode);
-
-/* llite/rw.c */
-int ll_writepage(struct page *page, struct writeback_control *wbc);
-int ll_writepages(struct address_space *mapping, struct writeback_control *wbc);
-int ll_readpage(struct file *file, struct page *page);
-void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
-struct ll_cl_context *ll_cl_find(struct file *file);
-void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io);
-void ll_cl_remove(struct file *file, const struct lu_env *env);
-
-extern const struct address_space_operations ll_aops;
-
-/* llite/file.c */
-extern const struct file_operations ll_file_operations;
-extern const struct file_operations ll_file_operations_flock;
-extern const struct file_operations ll_file_operations_noflock;
-extern const struct inode_operations ll_file_inode_operations;
-int ll_have_md_lock(struct inode *inode, __u64 *bits,
- enum ldlm_mode l_req_mode);
-enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
- struct lustre_handle *lockh, __u64 flags,
- enum ldlm_mode mode);
-int ll_file_open(struct inode *inode, struct file *file);
-int ll_file_release(struct inode *inode, struct file *file);
-int ll_release_openhandle(struct inode *inode, struct lookup_intent *it);
-int ll_md_real_close(struct inode *inode, fmode_t fmode);
-int ll_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags);
-struct posix_acl *ll_get_acl(struct inode *inode, int type);
-int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
- const char *name, int namelen);
-int ll_get_fid_by_name(struct inode *parent, const char *name,
- int namelen, struct lu_fid *fid, struct inode **inode);
-int ll_inode_permission(struct inode *inode, int mask);
-
-int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
- __u64 flags, struct lov_user_md *lum,
- int lum_size);
-int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
- struct lov_mds_md **lmm, int *lmm_size,
- struct ptlrpc_request **request);
-int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
- int set_default);
-int ll_dir_getstripe(struct inode *inode, void **lmmp, int *lmm_size,
- struct ptlrpc_request **request, u64 valid);
-int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
-int ll_merge_attr(const struct lu_env *env, struct inode *inode);
-int ll_fid2path(struct inode *inode, void __user *arg);
-int ll_data_version(struct inode *inode, __u64 *data_version, int flags);
-int ll_hsm_release(struct inode *inode);
-int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss);
-
-/* llite/dcache.c */
-
-extern const struct dentry_operations ll_d_ops;
-void ll_intent_drop_lock(struct lookup_intent *it);
-void ll_intent_release(struct lookup_intent *it);
-void ll_invalidate_aliases(struct inode *inode);
-void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode);
-int ll_revalidate_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it, struct inode *inode);
-
-/* llite/llite_lib.c */
-extern struct super_operations lustre_super_operations;
-
-void ll_lli_init(struct ll_inode_info *lli);
-int ll_fill_super(struct super_block *sb);
-void ll_put_super(struct super_block *sb);
-void ll_kill_super(struct super_block *sb);
-struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock);
-void ll_dir_clear_lsm_md(struct inode *inode);
-void ll_clear_inode(struct inode *inode);
-int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import);
-int ll_setattr(struct dentry *de, struct iattr *attr);
-int ll_statfs(struct dentry *de, struct kstatfs *sfs);
-int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
- __u64 max_age, __u32 flags);
-int ll_update_inode(struct inode *inode, struct lustre_md *md);
-int ll_read_inode2(struct inode *inode, void *opaque);
-void ll_delete_inode(struct inode *inode);
-int ll_iocontrol(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg);
-int ll_flush_ctx(struct inode *inode);
-void ll_umount_begin(struct super_block *sb);
-int ll_remount_fs(struct super_block *sb, int *flags, char *data);
-int ll_show_options(struct seq_file *seq, struct dentry *dentry);
-void ll_dirty_page_discard_warn(struct page *page, int ioret);
-int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
- struct super_block *sb, struct lookup_intent *it);
-int ll_obd_statfs(struct inode *inode, void __user *arg);
-int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
-int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize);
-int ll_set_default_mdsize(struct ll_sb_info *sbi, int default_mdsize);
-int ll_process_config(struct lustre_cfg *lcfg);
-
-enum {
- LUSTRE_OPC_MKDIR = 0,
- LUSTRE_OPC_SYMLINK = 1,
- LUSTRE_OPC_MKNOD = 2,
- LUSTRE_OPC_CREATE = 3,
- LUSTRE_OPC_ANY = 5,
-};
-
-struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
- struct inode *i1, struct inode *i2,
- const char *name, size_t namelen,
- u32 mode, __u32 opc, void *data);
-void ll_finish_md_op_data(struct md_op_data *op_data);
-int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
-char *ll_get_fsname(struct super_block *sb, char *buf, int buflen);
-void ll_compute_rootsquash_state(struct ll_sb_info *sbi);
-void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req);
-ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
- struct lov_user_md **kbuf);
-
-/* Compute expected user md size when passing in a md from user space */
-static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
-{
- switch (lum->lmm_magic) {
- case LOV_USER_MAGIC_V1:
- return sizeof(struct lov_user_md_v1);
- case LOV_USER_MAGIC_V3:
- return sizeof(struct lov_user_md_v3);
- case LOV_USER_MAGIC_SPECIFIC:
- if (lum->lmm_stripe_count > LOV_MAX_STRIPE_COUNT)
- return -EINVAL;
-
- return lov_user_md_size(lum->lmm_stripe_count,
- LOV_USER_MAGIC_SPECIFIC);
- }
- return -EINVAL;
-}
-
-/* llite/llite_nfs.c */
-extern const struct export_operations lustre_export_operations;
-__u32 get_uuid2int(const char *name, int len);
-void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid);
-struct inode *search_inode_for_lustre(struct super_block *sb,
- const struct lu_fid *fid);
-int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid);
-
-/* llite/symlink.c */
-extern const struct inode_operations ll_fast_symlink_inode_operations;
-
-/**
- * IO arguments for various VFS I/O interfaces.
- */
-struct vvp_io_args {
- /** normal/splice */
- union {
- struct {
- struct kiocb *via_iocb;
- struct iov_iter *via_iter;
- } normal;
- } u;
-};
-
-struct ll_cl_context {
- struct list_head lcc_list;
- void *lcc_cookie;
- const struct lu_env *lcc_env;
- struct cl_io *lcc_io;
- struct cl_page *lcc_page;
-};
-
-struct ll_thread_info {
- struct vvp_io_args lti_args;
- struct ra_io_arg lti_ria;
- struct ll_cl_context lti_io_ctx;
-};
-
-extern struct lu_context_key ll_thread_key;
-static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
-{
- struct ll_thread_info *lti;
-
- lti = lu_context_key_get(&env->le_ctx, &ll_thread_key);
- LASSERT(lti);
- return lti;
-}
-
-static inline struct vvp_io_args *ll_env_args(const struct lu_env *env)
-{
- return &ll_env_info(env)->lti_args;
-}
-
-/* llite/llite_mmap.c */
-
-int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last);
-int ll_file_mmap(struct file *file, struct vm_area_struct *vma);
-void policy_from_vma(union ldlm_policy_data *policy, struct vm_area_struct *vma,
- unsigned long addr, size_t count);
-struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
- size_t count);
-
-static inline void ll_invalidate_page(struct page *vmpage)
-{
- struct address_space *mapping = vmpage->mapping;
- loff_t offset = vmpage->index << PAGE_SHIFT;
-
- LASSERT(PageLocked(vmpage));
- if (!mapping)
- return;
-
- /*
- * truncate_complete_page() calls
- * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
- */
- ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE);
- truncate_complete_page(mapping, vmpage);
-}
-
-#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
-
-/* don't need an addref as the sb_info should be holding one */
-static inline struct obd_export *ll_s2dtexp(struct super_block *sb)
-{
- return ll_s2sbi(sb)->ll_dt_exp;
-}
-
-/* don't need an addref as the sb_info should be holding one */
-static inline struct obd_export *ll_s2mdexp(struct super_block *sb)
-{
- return ll_s2sbi(sb)->ll_md_exp;
-}
-
-static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi)
-{
- struct obd_device *obd = sbi->ll_md_exp->exp_obd;
-
- if (!obd)
- LBUG();
- return &obd->u.cli;
-}
-
-/* FIXME: replace the name of this with LL_SB to conform to kernel stuff */
-static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
-{
- return ll_s2sbi(inode->i_sb);
-}
-
-static inline struct obd_export *ll_i2dtexp(struct inode *inode)
-{
- return ll_s2dtexp(inode->i_sb);
-}
-
-static inline struct obd_export *ll_i2mdexp(struct inode *inode)
-{
- return ll_s2mdexp(inode->i_sb);
-}
-
-static inline struct lu_fid *ll_inode2fid(struct inode *inode)
-{
- struct lu_fid *fid;
-
- LASSERT(inode);
- fid = &ll_i2info(inode)->lli_fid;
-
- return fid;
-}
-
-static inline loff_t ll_file_maxbytes(struct inode *inode)
-{
- struct cl_object *obj = ll_i2info(inode)->lli_clob;
-
- if (!obj)
- return MAX_LFS_FILESIZE;
-
- return min_t(loff_t, cl_object_maxbytes(obj), MAX_LFS_FILESIZE);
-}
-
-/* llite/xattr.c */
-extern const struct xattr_handler *ll_xattr_handlers[];
-
-#define XATTR_USER_T 1
-#define XATTR_TRUSTED_T 2
-#define XATTR_SECURITY_T 3
-#define XATTR_ACL_ACCESS_T 4
-#define XATTR_ACL_DEFAULT_T 5
-#define XATTR_LUSTRE_T 6
-#define XATTR_OTHER_T 7
-
-ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
-int ll_xattr_list(struct inode *inode, const char *name, int type,
- void *buffer, size_t size, __u64 valid);
-const struct xattr_handler *get_xattr_type(const char *name);
-
-/**
- * Common IO arguments for various VFS I/O interfaces.
- */
-int cl_sb_init(struct super_block *sb);
-int cl_sb_fini(struct super_block *sb);
-
-enum ras_update_flags {
- LL_RAS_HIT = 0x1,
- LL_RAS_MMAP = 0x2
-};
-void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
-void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
-
-/* statahead.c */
-#define LL_SA_RPC_MIN 2
-#define LL_SA_RPC_DEF 32
-#define LL_SA_RPC_MAX 8192
-
-#define LL_SA_CACHE_BIT 5
-#define LL_SA_CACHE_SIZE (1 << LL_SA_CACHE_BIT)
-#define LL_SA_CACHE_MASK (LL_SA_CACHE_SIZE - 1)
-
-/* per inode struct, for dir only */
-struct ll_statahead_info {
- struct dentry *sai_dentry;
- atomic_t sai_refcount; /* when access this struct, hold
- * refcount
- */
- unsigned int sai_max; /* max ahead of lookup */
- __u64 sai_sent; /* stat requests sent count */
- __u64 sai_replied; /* stat requests which received
- * reply
- */
- __u64 sai_index; /* index of statahead entry */
- __u64 sai_index_wait; /* index of entry which is the
- * caller is waiting for
- */
- __u64 sai_hit; /* hit count */
- __u64 sai_miss; /* miss count:
- * for "ls -al" case, it includes
- * hidden dentry miss;
- * for "ls -l" case, it does not
- * include hidden dentry miss.
- * "sai_miss_hidden" is used for
- * the later case.
- */
- unsigned int sai_consecutive_miss; /* consecutive miss */
- unsigned int sai_miss_hidden;/* "ls -al", but first dentry
- * is not a hidden one
- */
- unsigned int sai_skip_hidden;/* skipped hidden dentry count */
- unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for
- * hidden entries
- */
- sai_agl_valid:1,/* AGL is valid for the dir */
- sai_in_readpage:1;/* statahead in readdir() */
- wait_queue_head_t sai_waitq; /* stat-ahead wait queue */
- struct task_struct *sai_task; /* stat-ahead thread */
- struct task_struct *sai_agl_task; /* AGL thread */
- struct list_head sai_interim_entries; /* entries which got async
- * stat reply, but not
- * instantiated
- */
- struct list_head sai_entries; /* completed entries */
- struct list_head sai_agls; /* AGLs to be sent */
- struct list_head sai_cache[LL_SA_CACHE_SIZE];
- spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE];
- atomic_t sai_cache_count; /* entry count in cache */
-};
-
-int ll_statahead(struct inode *dir, struct dentry **dentry, bool unplug);
-void ll_authorize_statahead(struct inode *dir, void *key);
-void ll_deauthorize_statahead(struct inode *dir, void *key);
-
-blkcnt_t dirty_cnt(struct inode *inode);
-
-int cl_glimpse_size0(struct inode *inode, int agl);
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
- struct inode *inode, struct cl_object *clob, int agl);
-
-static inline int cl_glimpse_size(struct inode *inode)
-{
- return cl_glimpse_size0(inode, 0);
-}
-
-static inline int cl_agl(struct inode *inode)
-{
- return cl_glimpse_size0(inode, 1);
-}
-
-static inline int ll_glimpse_size(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- down_read(&lli->lli_glimpse_sem);
- rc = cl_glimpse_size(inode);
- lli->lli_glimpse_time = cfs_time_current();
- up_read(&lli->lli_glimpse_sem);
- return rc;
-}
-
-/*
- * dentry may statahead when statahead is enabled and current process has opened
- * parent directory, and this dentry hasn't accessed statahead cache before
- */
-static inline bool
-dentry_may_statahead(struct inode *dir, struct dentry *dentry)
-{
- struct ll_inode_info *lli;
- struct ll_dentry_data *ldd;
-
- if (ll_i2sbi(dir)->ll_sa_max == 0)
- return false;
-
- lli = ll_i2info(dir);
-
- /*
- * statahead is not allowed for this dir, there may be three causes:
- * 1. dir is not opened.
- * 2. statahead hit ratio is too low.
- * 3. previous stat started statahead thread failed.
- */
- if (!lli->lli_sa_enabled)
- return false;
-
- /* not the same process, don't statahead */
- if (lli->lli_opendir_pid != current_pid())
- return false;
-
- /*
- * When stating a dentry, kernel may trigger 'revalidate' or 'lookup'
- * multiple times, eg. for 'getattr', 'getxattr' and etc.
- * For patchless client, lookup intent is not accurate, which may
- * misguide statahead. For example:
- * The 'revalidate' call for 'getattr' and 'getxattr' of a dentry will
- * have the same intent -- IT_GETATTR, while one dentry should access
- * statahead cache once, otherwise statahead windows is messed up.
- * The solution is as following:
- * Assign 'lld_sa_generation' with 'lli_sa_generation' when a dentry
- * IT_GETATTR for the first time, and subsequent IT_GETATTR will
- * bypass interacting with statahead cache by checking
- * 'lld_sa_generation == lli->lli_sa_generation'.
- */
- ldd = ll_d2d(dentry);
- if (ldd->lld_sa_generation == lli->lli_sa_generation)
- return false;
-
- return true;
-}
-
-/* llite ioctl register support routine */
-enum llioc_iter {
- LLIOC_CONT = 0,
- LLIOC_STOP
-};
-
-#define LLIOC_MAX_CMD 256
-
-/*
- * Rules to write a callback function:
- *
- * Parameters:
- * @magic: Dynamic ioctl call routine will feed this value with the pointer
- * returned to ll_iocontrol_register. Callback functions should use this
- * data to check the potential collasion of ioctl cmd. If collasion is
- * found, callback function should return LLIOC_CONT.
- * @rcp: The result of ioctl command.
- *
- * Return values:
- * If @magic matches the pointer returned by ll_iocontrol_data, the
- * callback should return LLIOC_STOP; return LLIOC_STOP otherwise.
- */
-typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
- struct file *file, unsigned int cmd, unsigned long arg,
- void *magic, int *rcp);
-
-/* export functions */
-/* Register ioctl block dynamatically for a regular file.
- *
- * @cmd: the array of ioctl command set
- * @count: number of commands in the @cmd
- * @cb: callback function, it will be called if an ioctl command is found to
- * belong to the command list @cmd.
- *
- * Return value:
- * A magic pointer will be returned if success;
- * otherwise, NULL will be returned.
- */
-void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
-void ll_iocontrol_unregister(void *magic);
-
-int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
- enum cl_fsync_mode mode, int ignore_layout);
-
-/** direct write pages */
-struct ll_dio_pages {
- /** page array to be written. we don't support
- * partial pages except the last one.
- */
- struct page **ldp_pages;
- /* offset of each page */
- loff_t *ldp_offsets;
- /** if ldp_offsets is NULL, it means a sequential
- * pages to be written, then this is the file offset
- * of the first page.
- */
- loff_t ldp_start_offset;
- /** how many bytes are to be written. */
- size_t ldp_size;
- /** # of pages in the array. */
- int ldp_nr;
-};
-
-ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
- int rw, struct inode *inode,
- struct ll_dio_pages *pv);
-
-static inline int ll_file_nolock(const struct file *file)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct inode *inode = file_inode(file);
-
- return ((fd->fd_flags & LL_FILE_IGNORE_LOCK) ||
- (ll_i2sbi(inode)->ll_flags & LL_SBI_NOLCK));
-}
-
-static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
- struct lookup_intent *it, __u64 *bits)
-{
- if (!it->it_lock_set) {
- struct lustre_handle handle;
-
- /* If this inode is a remote object, it will get two
- * separate locks in different namespaces, Master MDT,
- * where the name entry is, will grant LOOKUP lock,
- * remote MDT, where the object is, will grant
- * UPDATE|PERM lock. The inode will be attached to both
- * LOOKUP and PERM locks, so revoking either locks will
- * case the dcache being cleared
- */
- if (it->it_remote_lock_mode) {
- handle.cookie = it->it_remote_lock_handle;
- CDEBUG(D_DLMTRACE, "setting l_data to inode " DFID "%p for remote lock %#llx\n",
- PFID(ll_inode2fid(inode)), inode,
- handle.cookie);
- md_set_lock_data(exp, &handle, inode, NULL);
- }
-
- handle.cookie = it->it_lock_handle;
-
- CDEBUG(D_DLMTRACE,
- "setting l_data to inode " DFID "%p for lock %#llx\n",
- PFID(ll_inode2fid(inode)), inode, handle.cookie);
-
- md_set_lock_data(exp, &handle, inode, &it->it_lock_bits);
- it->it_lock_set = 1;
- }
-
- if (bits)
- *bits = it->it_lock_bits;
-}
-
-static inline int d_lustre_invalid(const struct dentry *dentry)
-{
- return ll_d2d(dentry)->lld_invalid;
-}
-
-/*
- * Mark dentry INVALID, if dentry refcount is zero (this is normally case for
- * ll_md_blocking_ast), unhash this dentry, and let dcache to reclaim it later;
- * else dput() of the last refcount will unhash this dentry and kill it.
- */
-static inline void d_lustre_invalidate(struct dentry *dentry, int nested)
-{
- CDEBUG(D_DENTRY,
- "invalidate dentry %pd (%p) parent %p inode %p refc %d\n",
- dentry, dentry,
- dentry->d_parent, d_inode(dentry), d_count(dentry));
-
- spin_lock_nested(&dentry->d_lock,
- nested ? DENTRY_D_LOCK_NESTED : DENTRY_D_LOCK_NORMAL);
- ll_d2d(dentry)->lld_invalid = 1;
- if (d_count(dentry) == 0)
- __d_drop(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
-static inline void d_lustre_revalidate(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- LASSERT(ll_d2d(dentry));
- ll_d2d(dentry)->lld_invalid = 0;
- spin_unlock(&dentry->d_lock);
-}
-
-int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
-int ll_layout_refresh(struct inode *inode, __u32 *gen);
-int ll_layout_restore(struct inode *inode, loff_t start, __u64 length);
-
-int ll_xattr_init(void);
-void ll_xattr_fini(void);
-
-int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, enum cl_req_type crt);
-
-int ll_getparent(struct file *file, struct getparent __user *arg);
-
-/* lcommon_cl.c */
-int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr,
- unsigned int attr_flags);
-
-extern struct lu_env *cl_inode_fini_env;
-extern u16 cl_inode_fini_refcheck;
-
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
-
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
-__u32 cl_fid_build_gen(const struct lu_fid *fid);
-
-#endif /* LLITE_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
deleted file mode 100644
index e7500c53fafc..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ /dev/null
@@ -1,2666 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/llite_lib.c
- *
- * Lustre Light Super operations
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/module.h>
-#include <linux/statfs.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_ha.h>
-#include <lustre_dlm.h>
-#include <lprocfs_status.h>
-#include <lustre_disk.h>
-#include <uapi/linux/lustre/lustre_param.h>
-#include <lustre_log.h>
-#include <cl_object.h>
-#include <obd_cksum.h>
-#include "llite_internal.h"
-
-struct kmem_cache *ll_file_data_slab;
-struct dentry *llite_root;
-struct kset *llite_kset;
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
-static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
-{
- struct ll_sb_info *sbi = NULL;
- unsigned long pages;
- unsigned long lru_page_max;
- struct sysinfo si;
- class_uuid_t uuid;
- int i;
-
- sbi = kzalloc(sizeof(*sbi), GFP_NOFS);
- if (!sbi)
- return NULL;
-
- spin_lock_init(&sbi->ll_lock);
- mutex_init(&sbi->ll_lco.lco_lock);
- spin_lock_init(&sbi->ll_pp_extent_lock);
- spin_lock_init(&sbi->ll_process_lock);
- sbi->ll_rw_stats_on = 0;
-
- si_meminfo(&si);
- pages = si.totalram - si.totalhigh;
- lru_page_max = pages / 2;
-
- sbi->ll_cache = cl_cache_init(lru_page_max);
- if (!sbi->ll_cache) {
- kfree(sbi);
- return NULL;
- }
-
- sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
- SBI_DEFAULT_READAHEAD_MAX);
- sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
- sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
- SBI_DEFAULT_READAHEAD_WHOLE_MAX;
-
- ll_generate_random_uuid(uuid);
- class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
- CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
-
- sbi->ll_flags |= LL_SBI_VERBOSE;
- sbi->ll_flags |= LL_SBI_CHECKSUM;
-
- sbi->ll_flags |= LL_SBI_LRU_RESIZE;
- sbi->ll_flags |= LL_SBI_LAZYSTATFS;
-
- for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
- spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
- pp_r_hist.oh_lock);
- spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
- pp_w_hist.oh_lock);
- }
-
- /* metadata statahead is enabled by default */
- sbi->ll_sa_max = LL_SA_RPC_DEF;
- atomic_set(&sbi->ll_sa_total, 0);
- atomic_set(&sbi->ll_sa_wrong, 0);
- atomic_set(&sbi->ll_sa_running, 0);
- atomic_set(&sbi->ll_agl_total, 0);
- sbi->ll_flags |= LL_SBI_AGL_ENABLED;
-
- /* root squash */
- sbi->ll_squash.rsi_uid = 0;
- sbi->ll_squash.rsi_gid = 0;
- INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids);
- init_rwsem(&sbi->ll_squash.rsi_sem);
-
- sbi->ll_sb = sb;
-
- return sbi;
-}
-
-static void ll_free_sbi(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-
- if (sbi->ll_cache) {
- if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids))
- cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids);
- cl_cache_decref(sbi->ll_cache);
- sbi->ll_cache = NULL;
- }
-
- kfree(sbi);
-}
-
-static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
-{
- struct inode *root = NULL;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct obd_statfs *osfs = NULL;
- struct ptlrpc_request *request = NULL;
- struct obd_connect_data *data = NULL;
- struct obd_uuid *uuid;
- struct md_op_data *op_data;
- struct lustre_md lmd;
- u64 valid;
- int size, err, checksum;
-
- obd = class_name2obd(md);
- if (!obd) {
- CERROR("MD %s: not setup or attached\n", md);
- return -EINVAL;
- }
-
- data = kzalloc(sizeof(*data), GFP_NOFS);
- if (!data)
- return -ENOMEM;
-
- osfs = kzalloc(sizeof(*osfs), GFP_NOFS);
- if (!osfs) {
- kfree(data);
- return -ENOMEM;
- }
-
- /* indicate the features supported by this client */
- data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH |
- OBD_CONNECT_ATTRFID |
- OBD_CONNECT_VERSION | OBD_CONNECT_BRW_SIZE |
- OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
- OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 |
- OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
- OBD_CONNECT_64BITHASH |
- OBD_CONNECT_EINPROGRESS |
- OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
- OBD_CONNECT_LAYOUTLOCK |
- OBD_CONNECT_PINGLESS |
- OBD_CONNECT_MAX_EASIZE |
- OBD_CONNECT_FLOCK_DEAD |
- OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
- OBD_CONNECT_OPEN_BY_FID |
- OBD_CONNECT_DIR_STRIPE |
- OBD_CONNECT_BULK_MBITS;
-
- if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
- data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-#ifdef CONFIG_FS_POSIX_ACL
- data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK;
-#endif
-
- if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT))
- /* flag mdc connection as lightweight, only used for test
- * purpose, use with care
- */
- data->ocd_connect_flags |= OBD_CONNECT_LIGHTWEIGHT;
-
- data->ocd_ibits_known = MDS_INODELOCK_FULL;
- data->ocd_version = LUSTRE_VERSION_CODE;
-
- if (sb_rdonly(sb))
- data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
- if (sbi->ll_flags & LL_SBI_USER_XATTR)
- data->ocd_connect_flags |= OBD_CONNECT_XATTR;
-
- if (sbi->ll_flags & LL_SBI_FLOCK)
- sbi->ll_fop = &ll_file_operations_flock;
- else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
- sbi->ll_fop = &ll_file_operations;
- else
- sbi->ll_fop = &ll_file_operations_noflock;
-
- /* always ping even if server suppress_pings */
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
- data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
-
- data->ocd_brw_size = MD_MAX_BRW_SIZE;
-
- err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid,
- data, NULL);
- if (err == -EBUSY) {
- LCONSOLE_ERROR_MSG(0x14f,
- "An MDT (md %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n",
- md);
- goto out;
- }
-
- if (err) {
- CERROR("cannot connect to %s: rc = %d\n", md, err);
- goto out;
- }
-
- sbi->ll_md_exp->exp_connect_data = *data;
-
- err = obd_fid_init(sbi->ll_md_exp->exp_obd, sbi->ll_md_exp,
- LUSTRE_SEQ_METADATA);
- if (err) {
- CERROR("%s: Can't init metadata layer FID infrastructure, rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name, err);
- goto out_md;
- }
-
- /* For mount, we only need fs info from MDT0, and also in DNE, it
- * can make sure the client can be mounted as long as MDT0 is
- * available
- */
- err = obd_statfs(NULL, sbi->ll_md_exp, osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_FOR_MDT0);
- if (err)
- goto out_md_fid;
-
- /* This needs to be after statfs to ensure connect has finished.
- * Note that "data" does NOT contain the valid connect reply.
- * If connecting to a 1.8 server there will be no LMV device, so
- * we can access the MDC export directly and exp_connect_flags will
- * be non-zero, but if accessing an upgraded 2.1 server it will
- * have the correct flags filled in.
- * XXX: fill in the LMV exp_connect_flags from MDC(s).
- */
- valid = exp_connect_flags(sbi->ll_md_exp) & CLIENT_CONNECT_MDT_REQD;
- if (exp_connect_flags(sbi->ll_md_exp) != 0 &&
- valid != CLIENT_CONNECT_MDT_REQD) {
- char *buf;
-
- buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!buf) {
- err = -ENOMEM;
- goto out_md_fid;
- }
- obd_connect_flags2str(buf, PAGE_SIZE,
- valid ^ CLIENT_CONNECT_MDT_REQD, ",");
- LCONSOLE_ERROR_MSG(0x170,
- "Server %s does not support feature(s) needed for correct operation of this client (%s). Please upgrade server or downgrade client.\n",
- sbi->ll_md_exp->exp_obd->obd_name, buf);
- kfree(buf);
- err = -EPROTO;
- goto out_md_fid;
- }
-
- size = sizeof(*data);
- err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA),
- KEY_CONN_DATA, &size, data);
- if (err) {
- CERROR("%s: Get connect data failed: rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name, err);
- goto out_md_fid;
- }
-
- LASSERT(osfs->os_bsize);
- sb->s_blocksize = osfs->os_bsize;
- sb->s_blocksize_bits = log2(osfs->os_bsize);
- sb->s_magic = LL_SUPER_MAGIC;
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- sbi->ll_namelen = osfs->os_namelen;
- sbi->ll_mnt.mnt = current->fs->root.mnt;
-
- if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
- !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
- LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
- }
-
- if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
- sb->s_flags |= SB_POSIXACL;
- sbi->ll_flags |= LL_SBI_ACL;
- } else {
- LCONSOLE_INFO("client wants to enable acl, but mdt not!\n");
- sb->s_flags &= ~SB_POSIXACL;
- sbi->ll_flags &= ~LL_SBI_ACL;
- }
-
- if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
- sbi->ll_flags |= LL_SBI_64BIT_HASH;
-
- if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
- sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_SHIFT;
- else
- sbi->ll_md_brw_pages = 1;
-
- if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
- sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
-
- if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
- if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
- LCONSOLE_INFO(
- "%s: disabling xattr cache due to unknown maximum xattr size.\n",
- dt);
- } else {
- sbi->ll_flags |= LL_SBI_XATTR_CACHE;
- sbi->ll_xattr_cache_enabled = 1;
- }
- }
-
- obd = class_name2obd(dt);
- if (!obd) {
- CERROR("DT %s: not setup or attached\n", dt);
- err = -ENODEV;
- goto out_md_fid;
- }
-
- data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION |
- OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
- OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
- OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK|
- OBD_CONNECT_AT | OBD_CONNECT_OSS_CAPA |
- OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
- OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES |
- OBD_CONNECT_EINPROGRESS |
- OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
- OBD_CONNECT_LAYOUTLOCK |
- OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK |
- OBD_CONNECT_BULK_MBITS;
-
- if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
- /* OBD_CONNECT_CKSUM should always be set, even if checksums are
- * disabled by default, because it can still be enabled on the
- * fly via /sys. As a consequence, we still need to come to an
- * agreement on the supported algorithms at connect time
- */
- data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
- data->ocd_cksum_types = OBD_CKSUM_ADLER;
- else
- data->ocd_cksum_types = cksum_types_supported_client();
- }
-
- data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-
- /* always ping even if server suppress_pings */
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
- data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
-
- CDEBUG(D_RPCTRACE,
- "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n",
- data->ocd_connect_flags,
- data->ocd_version, data->ocd_grant);
-
- obd->obd_upcall.onu_owner = &sbi->ll_lco;
- obd->obd_upcall.onu_upcall = cl_ocd_update;
-
- data->ocd_brw_size = DT_MAX_BRW_SIZE;
-
- err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, data,
- NULL);
- if (err == -EBUSY) {
- LCONSOLE_ERROR_MSG(0x150,
- "An OST (dt %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n",
- dt);
- goto out_md;
- } else if (err) {
- CERROR("%s: Cannot connect to %s: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, dt, err);
- goto out_md;
- }
-
- sbi->ll_dt_exp->exp_connect_data = *data;
-
- err = obd_fid_init(sbi->ll_dt_exp->exp_obd, sbi->ll_dt_exp,
- LUSTRE_SEQ_METADATA);
- if (err) {
- CERROR("%s: Can't init data layer FID infrastructure, rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, err);
- goto out_dt;
- }
-
- mutex_lock(&sbi->ll_lco.lco_lock);
- sbi->ll_lco.lco_flags = data->ocd_connect_flags;
- sbi->ll_lco.lco_md_exp = sbi->ll_md_exp;
- sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp;
- mutex_unlock(&sbi->ll_lco.lco_lock);
-
- fid_zero(&sbi->ll_root_fid);
- err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid);
- if (err) {
- CERROR("cannot mds_connect: rc = %d\n", err);
- goto out_lock_cn_cb;
- }
- if (!fid_is_sane(&sbi->ll_root_fid)) {
- CERROR("%s: Invalid root fid " DFID " during mount\n",
- sbi->ll_md_exp->exp_obd->obd_name,
- PFID(&sbi->ll_root_fid));
- err = -EINVAL;
- goto out_lock_cn_cb;
- }
- CDEBUG(D_SUPER, "rootfid " DFID "\n", PFID(&sbi->ll_root_fid));
-
- sb->s_op = &lustre_super_operations;
- sb->s_xattr = ll_xattr_handlers;
-#if THREAD_SIZE >= 8192 /*b=17630*/
- sb->s_export_op = &lustre_export_operations;
-#endif
-
- /* make root inode
- * XXX: move this to after cbd setup?
- */
- valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
- if (sbi->ll_flags & LL_SBI_ACL)
- valid |= OBD_MD_FLACL;
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data) {
- err = -ENOMEM;
- goto out_lock_cn_cb;
- }
-
- op_data->op_fid1 = sbi->ll_root_fid;
- op_data->op_mode = 0;
- op_data->op_valid = valid;
-
- err = md_getattr(sbi->ll_md_exp, op_data, &request);
- kfree(op_data);
- if (err) {
- CERROR("%s: md_getattr failed for root: rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name, err);
- goto out_lock_cn_cb;
- }
-
- err = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
- sbi->ll_md_exp, &lmd);
- if (err) {
- CERROR("failed to understand root inode md: rc = %d\n", err);
- ptlrpc_req_finished(request);
- goto out_lock_cn_cb;
- }
-
- LASSERT(fid_is_sane(&sbi->ll_root_fid));
- root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid,
- sbi->ll_flags & LL_SBI_32BIT_API),
- &lmd);
- md_free_lustre_md(sbi->ll_md_exp, &lmd);
- ptlrpc_req_finished(request);
-
- if (IS_ERR(root)) {
-#ifdef CONFIG_FS_POSIX_ACL
- if (lmd.posix_acl) {
- posix_acl_release(lmd.posix_acl);
- lmd.posix_acl = NULL;
- }
-#endif
- err = -EBADF;
- CERROR("lustre_lite: bad iget4 for root\n");
- goto out_root;
- }
-
- checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
- err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
- KEY_CHECKSUM, sizeof(checksum), &checksum,
- NULL);
- if (err) {
- CERROR("%s: Set checksum failed: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, err);
- goto out_root;
- }
- cl_sb_init(sb);
-
- err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
- KEY_CACHE_SET, sizeof(*sbi->ll_cache),
- sbi->ll_cache, NULL);
- if (err) {
- CERROR("%s: Set cache_set failed: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, err);
- goto out_root;
- }
-
- sb->s_root = d_make_root(root);
- if (!sb->s_root) {
- CERROR("%s: can't make root dentry\n",
- ll_get_fsname(sb, NULL, 0));
- err = -ENOMEM;
- goto out_lock_cn_cb;
- }
-
- sbi->ll_sdev_orig = sb->s_dev;
-
- /* We set sb->s_dev equal on all lustre clients in order to support
- * NFS export clustering. NFSD requires that the FSID be the same
- * on all clients.
- */
- /* s_dev is also used in lt_compare() to compare two fs, but that is
- * only a node-local comparison.
- */
- uuid = obd_get_uuid(sbi->ll_md_exp);
- if (uuid) {
- sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid));
- get_uuid2fsid(uuid->uuid, strlen(uuid->uuid), &sbi->ll_fsid);
- }
-
- kfree(data);
- kfree(osfs);
-
- if (llite_root) {
- err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
- if (err < 0) {
- CERROR("%s: could not register mount in debugfs: "
- "rc = %d\n", ll_get_fsname(sb, NULL, 0), err);
- err = 0;
- }
- }
-
- return err;
-out_root:
- iput(root);
-out_lock_cn_cb:
- obd_fid_fini(sbi->ll_dt_exp->exp_obd);
-out_dt:
- obd_disconnect(sbi->ll_dt_exp);
- sbi->ll_dt_exp = NULL;
-out_md_fid:
- obd_fid_fini(sbi->ll_md_exp->exp_obd);
-out_md:
- obd_disconnect(sbi->ll_md_exp);
- sbi->ll_md_exp = NULL;
-out:
- kfree(data);
- kfree(osfs);
- return err;
-}
-
-int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
-{
- int size, rc;
-
- size = sizeof(*lmmsize);
- rc = obd_get_info(NULL, sbi->ll_dt_exp, sizeof(KEY_MAX_EASIZE),
- KEY_MAX_EASIZE, &size, lmmsize);
- if (rc) {
- CERROR("%s: cannot get max LOV EA size: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, rc);
- return rc;
- }
-
- size = sizeof(int);
- rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
- KEY_MAX_EASIZE, &size, lmmsize);
- if (rc)
- CERROR("Get max mdsize error rc %d\n", rc);
-
- return rc;
-}
-
-/**
- * Get the value of the default_easize parameter.
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] sbi superblock info for this filesystem
- * \param[out] lmmsize pointer to storage location for value
- *
- * \retval 0 on success
- * \retval negative negated errno on failure
- */
-int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
-{
- int size, rc;
-
- size = sizeof(int);
- rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE),
- KEY_DEFAULT_EASIZE, &size, lmmsize);
- if (rc)
- CERROR("Get default mdsize error rc %d\n", rc);
-
- return rc;
-}
-
-/**
- * Set the default_easize parameter to the given value.
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] sbi superblock info for this filesystem
- * \param[in] lmmsize the size to set
- *
- * \retval 0 on success
- * \retval negative negated errno on failure
- */
-int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize)
-{
- if (lmmsize < sizeof(struct lov_mds_md) ||
- lmmsize > OBD_MAX_DEFAULT_EA_SIZE)
- return -EINVAL;
-
- return obd_set_info_async(NULL, sbi->ll_md_exp,
- sizeof(KEY_DEFAULT_EASIZE),
- KEY_DEFAULT_EASIZE,
- sizeof(int), &lmmsize, NULL);
-}
-
-static void client_common_put_super(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-
- cl_sb_fini(sb);
-
- obd_fid_fini(sbi->ll_dt_exp->exp_obd);
- obd_disconnect(sbi->ll_dt_exp);
- sbi->ll_dt_exp = NULL;
-
- ldebugfs_unregister_mountpoint(sbi);
-
- obd_fid_fini(sbi->ll_md_exp->exp_obd);
- obd_disconnect(sbi->ll_md_exp);
- sbi->ll_md_exp = NULL;
-}
-
-void ll_kill_super(struct super_block *sb)
-{
- struct ll_sb_info *sbi;
-
- /* not init sb ?*/
- if (!(sb->s_flags & SB_ACTIVE))
- return;
-
- sbi = ll_s2sbi(sb);
- /* we need to restore s_dev from changed for clustered NFS before
- * put_super because new kernels have cached s_dev and change sb->s_dev
- * in put_super not affected real removing devices
- */
- if (sbi) {
- sb->s_dev = sbi->ll_sdev_orig;
- sbi->ll_umounting = 1;
-
- /* wait running statahead threads to quit */
- while (atomic_read(&sbi->ll_sa_running) > 0) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC >> 3));
- }
- }
-}
-
-static inline int ll_set_opt(const char *opt, char *data, int fl)
-{
- if (strncmp(opt, data, strlen(opt)) != 0)
- return 0;
- else
- return fl;
-}
-
-/* non-client-specific mount options are parsed in lmd_parse */
-static int ll_options(char *options, int *flags)
-{
- int tmp;
- char *s1 = options, *s2;
-
- if (!options)
- return 0;
-
- CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
-
- while (*s1) {
- CDEBUG(D_SUPER, "next opt=%s\n", s1);
- tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("noflock", s1,
- LL_SBI_FLOCK | LL_SBI_LOCALFLOCK);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("context", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("fscontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("defcontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("rootcontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nouser_fid2path", s1, LL_SBI_USER_FID2PATH);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
-
- tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nochecksum", s1, LL_SBI_CHECKSUM);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("lazystatfs", s1, LL_SBI_LAZYSTATFS);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nolazystatfs", s1, LL_SBI_LAZYSTATFS);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("verbose", s1, LL_SBI_VERBOSE);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("noverbose", s1, LL_SBI_VERBOSE);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("always_ping", s1, LL_SBI_ALWAYS_PING);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
- s1);
- return -EINVAL;
-
-next:
- /* Find next opt */
- s2 = strchr(s1, ',');
- if (!s2)
- break;
- s1 = s2 + 1;
- }
- return 0;
-}
-
-void ll_lli_init(struct ll_inode_info *lli)
-{
- lli->lli_inode_magic = LLI_INODE_MAGIC;
- lli->lli_flags = 0;
- spin_lock_init(&lli->lli_lock);
- lli->lli_posix_acl = NULL;
- /* Do not set lli_fid, it has been initialized already. */
- fid_zero(&lli->lli_pfid);
- lli->lli_mds_read_och = NULL;
- lli->lli_mds_write_och = NULL;
- lli->lli_mds_exec_och = NULL;
- lli->lli_open_fd_read_count = 0;
- lli->lli_open_fd_write_count = 0;
- lli->lli_open_fd_exec_count = 0;
- mutex_init(&lli->lli_och_mutex);
- spin_lock_init(&lli->lli_agl_lock);
- spin_lock_init(&lli->lli_layout_lock);
- ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE);
- lli->lli_clob = NULL;
-
- init_rwsem(&lli->lli_xattrs_list_rwsem);
- mutex_init(&lli->lli_xattrs_enq_lock);
-
- LASSERT(lli->lli_vfs_inode.i_mode != 0);
- if (S_ISDIR(lli->lli_vfs_inode.i_mode)) {
- mutex_init(&lli->lli_readdir_mutex);
- lli->lli_opendir_key = NULL;
- lli->lli_sai = NULL;
- spin_lock_init(&lli->lli_sa_lock);
- lli->lli_opendir_pid = 0;
- lli->lli_sa_enabled = 0;
- lli->lli_def_stripe_offset = -1;
- } else {
- mutex_init(&lli->lli_size_mutex);
- lli->lli_symlink_name = NULL;
- init_rwsem(&lli->lli_trunc_sem);
- range_lock_tree_init(&lli->lli_write_tree);
- init_rwsem(&lli->lli_glimpse_sem);
- lli->lli_glimpse_time = 0;
- INIT_LIST_HEAD(&lli->lli_agl_list);
- lli->lli_agl_index = 0;
- lli->lli_async_rc = 0;
- }
- mutex_init(&lli->lli_layout_mutex);
-}
-
-int ll_fill_super(struct super_block *sb)
-{
- struct lustre_profile *lprof = NULL;
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct ll_sb_info *sbi;
- char *dt = NULL, *md = NULL;
- char *profilenm = get_profile_name(sb);
- struct config_llog_instance *cfg;
- int err;
- static atomic_t ll_bdi_num = ATOMIC_INIT(0);
-
- CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-
- err = ptlrpc_inc_ref();
- if (err)
- return err;
-
- cfg = kzalloc(sizeof(*cfg), GFP_NOFS);
- if (!cfg) {
- err = -ENOMEM;
- goto out_put;
- }
-
- try_module_get(THIS_MODULE);
-
- /* client additional sb info */
- sbi = ll_init_sbi(sb);
- lsi->lsi_llsbi = sbi;
- if (!sbi) {
- module_put(THIS_MODULE);
- kfree(cfg);
- err = -ENOMEM;
- goto out_put;
- }
-
- err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
- if (err)
- goto out_free;
-
- err = super_setup_bdi_name(sb, "lustre-%d",
- atomic_inc_return(&ll_bdi_num));
- if (err)
- goto out_free;
-
- /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
- sb->s_d_op = &ll_d_ops;
-
- /* Generate a string unique to this super, in case some joker tries
- * to mount the same fs at two mount points.
- * Use the address of the super itself.
- */
- cfg->cfg_instance = sb;
- cfg->cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
- cfg->cfg_callback = class_config_llog_handler;
- /* set up client obds */
- err = lustre_process_log(sb, profilenm, cfg);
- if (err < 0)
- goto out_free;
-
- /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
- lprof = class_get_profile(profilenm);
- if (!lprof) {
- LCONSOLE_ERROR_MSG(0x156,
- "The client profile '%s' could not be read from the MGS. Does that filesystem exist?\n",
- profilenm);
- err = -EINVAL;
- goto out_free;
- }
- CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
- lprof->lp_md, lprof->lp_dt);
-
- dt = kasprintf(GFP_NOFS, "%s-%p", lprof->lp_dt, cfg->cfg_instance);
- if (!dt) {
- err = -ENOMEM;
- goto out_free;
- }
-
- md = kasprintf(GFP_NOFS, "%s-%p", lprof->lp_md, cfg->cfg_instance);
- if (!md) {
- err = -ENOMEM;
- goto out_free;
- }
-
- /* connections, registrations, sb setup */
- err = client_common_fill_super(sb, md, dt);
- if (!err)
- sbi->ll_client_common_fill_super_succeeded = 1;
-
-out_free:
- kfree(md);
- kfree(dt);
- if (lprof)
- class_put_profile(lprof);
- if (err)
- ll_put_super(sb);
- else if (sbi->ll_flags & LL_SBI_VERBOSE)
- LCONSOLE_WARN("Mounted %s\n", profilenm);
-
- kfree(cfg);
-out_put:
- if (err)
- ptlrpc_dec_ref();
- return err;
-} /* ll_fill_super */
-
-void ll_put_super(struct super_block *sb)
-{
- struct config_llog_instance cfg, params_cfg;
- struct obd_device *obd;
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- char *profilenm = get_profile_name(sb);
- int next, force = 1, rc = 0;
- long ccc_count;
-
- CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
-
- cfg.cfg_instance = sb;
- lustre_end_log(sb, profilenm, &cfg);
-
- params_cfg.cfg_instance = sb;
- lustre_end_log(sb, PARAMS_FILENAME, &params_cfg);
-
- if (sbi->ll_md_exp) {
- obd = class_exp2obd(sbi->ll_md_exp);
- if (obd)
- force = obd->obd_force;
- }
-
- /* Wait for unstable pages to be committed to stable storage */
- if (!force)
- rc = l_wait_event_abortable(sbi->ll_cache->ccc_unstable_waitq,
- !atomic_long_read(&sbi->ll_cache->ccc_unstable_nr));
-
- ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr);
- if (!force && rc != -ERESTARTSYS)
- LASSERTF(!ccc_count, "count: %li\n", ccc_count);
-
- /* We need to set force before the lov_disconnect in
- * lustre_common_put_super, since l_d cleans up osc's as well.
- */
- if (force) {
- next = 0;
- while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
- &next)) != NULL) {
- obd->obd_force = force;
- }
- }
-
- if (sbi->ll_client_common_fill_super_succeeded) {
- /* Only if client_common_fill_super succeeded */
- client_common_put_super(sb);
- }
-
- next = 0;
- while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)))
- class_manual_cleanup(obd);
-
- if (sbi->ll_flags & LL_SBI_VERBOSE)
- LCONSOLE_WARN("Unmounted %s\n", profilenm ? profilenm : "");
-
- if (profilenm)
- class_del_profile(profilenm);
-
- ll_free_sbi(sb);
- lsi->lsi_llsbi = NULL;
-
- lustre_common_put_super(sb);
-
- cl_env_cache_purge(~0);
-
- module_put(THIS_MODULE);
-
- ptlrpc_dec_ref();
-} /* client_put_super */
-
-struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
-{
- struct inode *inode = NULL;
-
- /* NOTE: we depend on atomic igrab() -bzzz */
- lock_res_and_lock(lock);
- if (lock->l_resource->lr_lvb_inode) {
- struct ll_inode_info *lli;
-
- lli = ll_i2info(lock->l_resource->lr_lvb_inode);
- if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
- inode = igrab(lock->l_resource->lr_lvb_inode);
- } else {
- inode = lock->l_resource->lr_lvb_inode;
- LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ? D_INFO :
- D_WARNING, lock,
- "lr_lvb_inode %p is bogus: magic %08x",
- lock->l_resource->lr_lvb_inode,
- lli->lli_inode_magic);
- inode = NULL;
- }
- }
- unlock_res_and_lock(lock);
- return inode;
-}
-
-void ll_dir_clear_lsm_md(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- LASSERT(S_ISDIR(inode->i_mode));
-
- if (lli->lli_lsm_md) {
- lmv_free_memmd(lli->lli_lsm_md);
- lli->lli_lsm_md = NULL;
- }
-}
-
-static struct inode *ll_iget_anon_dir(struct super_block *sb,
- const struct lu_fid *fid,
- struct lustre_md *md)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct mdt_body *body = md->body;
- struct inode *inode;
- ino_t ino;
-
- ino = cl_fid_build_ino(fid, sbi->ll_flags & LL_SBI_32BIT_API);
- inode = iget_locked(sb, ino);
- if (!inode) {
- CERROR("%s: failed get simple inode " DFID ": rc = -ENOENT\n",
- ll_get_fsname(sb, NULL, 0), PFID(fid));
- return ERR_PTR(-ENOENT);
- }
-
- if (inode->i_state & I_NEW) {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lmv_stripe_md *lsm = md->lmv;
-
- inode->i_mode = (inode->i_mode & ~S_IFMT) |
- (body->mbo_mode & S_IFMT);
- LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode " DFID "\n",
- PFID(fid));
-
- LTIME_S(inode->i_mtime) = 0;
- LTIME_S(inode->i_atime) = 0;
- LTIME_S(inode->i_ctime) = 0;
- inode->i_rdev = 0;
-
- inode->i_op = &ll_dir_inode_operations;
- inode->i_fop = &ll_dir_operations;
- lli->lli_fid = *fid;
- ll_lli_init(lli);
-
- LASSERT(lsm);
- /* master object FID */
- lli->lli_pfid = body->mbo_fid1;
- CDEBUG(D_INODE, "lli %p slave " DFID " master " DFID "\n",
- lli, PFID(fid), PFID(&lli->lli_pfid));
- unlock_new_inode(inode);
- }
-
- return inode;
-}
-
-static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
-{
- struct lmv_stripe_md *lsm = md->lmv;
- struct lu_fid *fid;
- int i;
-
- LASSERT(lsm);
- /*
- * XXX sigh, this lsm_root initialization should be in
- * LMV layer, but it needs ll_iget right now, so we
- * put this here right now.
- */
- for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
- fid = &lsm->lsm_md_oinfo[i].lmo_fid;
- LASSERT(!lsm->lsm_md_oinfo[i].lmo_root);
- /* Unfortunately ll_iget will call ll_update_inode,
- * where the initialization of slave inode is slightly
- * different, so it reset lsm_md to NULL to avoid
- * initializing lsm for slave inode.
- */
- /* For migrating inode, master stripe and master object will
- * be same, so we only need assign this inode
- */
- if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && !i)
- lsm->lsm_md_oinfo[i].lmo_root = inode;
- else
- lsm->lsm_md_oinfo[i].lmo_root =
- ll_iget_anon_dir(inode->i_sb, fid, md);
- if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) {
- int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root);
-
- lsm->lsm_md_oinfo[i].lmo_root = NULL;
- return rc;
- }
- }
-
- return 0;
-}
-
-static inline int lli_lsm_md_eq(const struct lmv_stripe_md *lsm_md1,
- const struct lmv_stripe_md *lsm_md2)
-{
- return lsm_md1->lsm_md_magic == lsm_md2->lsm_md_magic &&
- lsm_md1->lsm_md_stripe_count == lsm_md2->lsm_md_stripe_count &&
- lsm_md1->lsm_md_master_mdt_index ==
- lsm_md2->lsm_md_master_mdt_index &&
- lsm_md1->lsm_md_hash_type == lsm_md2->lsm_md_hash_type &&
- lsm_md1->lsm_md_layout_version ==
- lsm_md2->lsm_md_layout_version &&
- !strcmp(lsm_md1->lsm_md_pool_name,
- lsm_md2->lsm_md_pool_name);
-}
-
-static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lmv_stripe_md *lsm = md->lmv;
- int rc;
-
- LASSERT(S_ISDIR(inode->i_mode));
- CDEBUG(D_INODE, "update lsm %p of " DFID "\n", lli->lli_lsm_md,
- PFID(ll_inode2fid(inode)));
-
- /* no striped information from request. */
- if (!lsm) {
- if (!lli->lli_lsm_md) {
- return 0;
- } else if (lli->lli_lsm_md->lsm_md_hash_type &
- LMV_HASH_FLAG_MIGRATION) {
- /*
- * migration is done, the temporay MIGRATE layout has
- * been removed
- */
- CDEBUG(D_INODE, DFID " finish migration.\n",
- PFID(ll_inode2fid(inode)));
- lmv_free_memmd(lli->lli_lsm_md);
- lli->lli_lsm_md = NULL;
- return 0;
- }
- /*
- * The lustre_md from req does not include stripeEA,
- * see ll_md_setattr
- */
- return 0;
- }
-
- /* set the directory layout */
- if (!lli->lli_lsm_md) {
- struct cl_attr *attr;
-
- rc = ll_init_lsm_md(inode, md);
- if (rc)
- return rc;
-
- /*
- * set lsm_md to NULL, so the following free lustre_md
- * will not free this lsm
- */
- md->lmv = NULL;
- lli->lli_lsm_md = lsm;
-
- attr = kzalloc(sizeof(*attr), GFP_NOFS);
- if (!attr)
- return -ENOMEM;
-
- /* validate the lsm */
- rc = md_merge_attr(ll_i2mdexp(inode), lsm, attr,
- ll_md_blocking_ast);
- if (rc) {
- kfree(attr);
- return rc;
- }
-
- if (md->body->mbo_valid & OBD_MD_FLNLINK)
- md->body->mbo_nlink = attr->cat_nlink;
- if (md->body->mbo_valid & OBD_MD_FLSIZE)
- md->body->mbo_size = attr->cat_size;
- if (md->body->mbo_valid & OBD_MD_FLATIME)
- md->body->mbo_atime = attr->cat_atime;
- if (md->body->mbo_valid & OBD_MD_FLCTIME)
- md->body->mbo_ctime = attr->cat_ctime;
- if (md->body->mbo_valid & OBD_MD_FLMTIME)
- md->body->mbo_mtime = attr->cat_mtime;
-
- kfree(attr);
-
- CDEBUG(D_INODE, "Set lsm %p magic %x to " DFID "\n", lsm,
- lsm->lsm_md_magic, PFID(ll_inode2fid(inode)));
- return 0;
- }
-
- /* Compare the old and new stripe information */
- if (!lsm_md_eq(lli->lli_lsm_md, lsm)) {
- struct lmv_stripe_md *old_lsm = lli->lli_lsm_md;
- int idx;
-
- CERROR("%s: inode " DFID "(%p)'s lmv layout mismatch (%p)/(%p) magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n",
- ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid),
- inode, lsm, old_lsm,
- lsm->lsm_md_magic, old_lsm->lsm_md_magic,
- lsm->lsm_md_stripe_count,
- old_lsm->lsm_md_stripe_count,
- lsm->lsm_md_master_mdt_index,
- old_lsm->lsm_md_master_mdt_index,
- lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type,
- lsm->lsm_md_layout_version,
- old_lsm->lsm_md_layout_version,
- lsm->lsm_md_pool_name,
- old_lsm->lsm_md_pool_name);
-
- for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++) {
- CERROR("%s: sub FIDs in old lsm idx %d, old: " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0), idx,
- PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid));
- }
-
- for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++) {
- CERROR("%s: sub FIDs in new lsm idx %d, new: " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0), idx,
- PFID(&lsm->lsm_md_oinfo[idx].lmo_fid));
- }
-
- return -EIO;
- }
-
- return 0;
-}
-
-void ll_clear_inode(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- if (S_ISDIR(inode->i_mode)) {
- /* these should have been cleared in ll_file_release */
- LASSERT(!lli->lli_opendir_key);
- LASSERT(!lli->lli_sai);
- LASSERT(lli->lli_opendir_pid == 0);
- }
-
- md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
-
- LASSERT(!lli->lli_open_fd_write_count);
- LASSERT(!lli->lli_open_fd_read_count);
- LASSERT(!lli->lli_open_fd_exec_count);
-
- if (lli->lli_mds_write_och)
- ll_md_real_close(inode, FMODE_WRITE);
- if (lli->lli_mds_exec_och)
- ll_md_real_close(inode, FMODE_EXEC);
- if (lli->lli_mds_read_och)
- ll_md_real_close(inode, FMODE_READ);
-
- if (S_ISLNK(inode->i_mode)) {
- kfree(lli->lli_symlink_name);
- lli->lli_symlink_name = NULL;
- }
-
- ll_xattr_cache_destroy(inode);
-
-#ifdef CONFIG_FS_POSIX_ACL
- forget_all_cached_acls(inode);
- if (lli->lli_posix_acl) {
- posix_acl_release(lli->lli_posix_acl);
- lli->lli_posix_acl = NULL;
- }
-#endif
- lli->lli_inode_magic = LLI_INODE_DEAD;
-
- if (S_ISDIR(inode->i_mode))
- ll_dir_clear_lsm_md(inode);
- if (S_ISREG(inode->i_mode) && !is_bad_inode(inode))
- LASSERT(list_empty(&lli->lli_agl_list));
-
- /*
- * XXX This has to be done before lsm is freed below, because
- * cl_object still uses inode lsm.
- */
- cl_inode_fini(inode);
-}
-
-#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
-
-static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
-{
- struct lustre_md md;
- struct inode *inode = d_inode(dentry);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *request = NULL;
- int rc, ia_valid;
-
- op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &request);
- if (rc) {
- ptlrpc_req_finished(request);
- if (rc == -ENOENT) {
- clear_nlink(inode);
- /* Unlinked special device node? Or just a race?
- * Pretend we did everything.
- */
- if (!S_ISREG(inode->i_mode) &&
- !S_ISDIR(inode->i_mode)) {
- ia_valid = op_data->op_attr.ia_valid;
- op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS;
- rc = simple_setattr(dentry, &op_data->op_attr);
- op_data->op_attr.ia_valid = ia_valid;
- }
- } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) {
- CERROR("md_setattr fails: rc = %d\n", rc);
- }
- return rc;
- }
-
- rc = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
- sbi->ll_md_exp, &md);
- if (rc) {
- ptlrpc_req_finished(request);
- return rc;
- }
-
- ia_valid = op_data->op_attr.ia_valid;
- /* inode size will be in cl_setattr_ost, can't do it now since dirty
- * cache is not cleared yet.
- */
- op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
- if (S_ISREG(inode->i_mode))
- inode_lock(inode);
- rc = simple_setattr(dentry, &op_data->op_attr);
- if (S_ISREG(inode->i_mode))
- inode_unlock(inode);
- op_data->op_attr.ia_valid = ia_valid;
-
- rc = ll_update_inode(inode, &md);
- ptlrpc_req_finished(request);
-
- return rc;
-}
-
-/* If this inode has objects allocated to it (lsm != NULL), then the OST
- * object(s) determine the file size and mtime. Otherwise, the MDS will
- * keep these values until such a time that objects are allocated for it.
- * We do the MDS operations first, as it is checking permissions for us.
- * We don't to the MDS RPC if there is nothing that we want to store there,
- * otherwise there is no harm in updating mtime/atime on the MDS if we are
- * going to do an RPC anyways.
- *
- * If we are doing a truncate, we will send the mtime and ctime updates
- * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
- * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
- * at the same time.
- *
- * In case of HSMimport, we only set attr on MDS.
- */
-int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
-{
- struct inode *inode = d_inode(dentry);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct md_op_data *op_data = NULL;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "%s: setattr inode " DFID "(%p) from %llu to %llu, valid %x, hsm_import %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode,
- i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import);
-
- if (attr->ia_valid & ATTR_SIZE) {
- /* Check new size against VFS/VM file size limit and rlimit */
- rc = inode_newsize_ok(inode, attr->ia_size);
- if (rc)
- return rc;
-
- /* The maximum Lustre file size is variable, based on the
- * OST maximum object size and number of stripes. This
- * needs another check in addition to the VFS check above.
- */
- if (attr->ia_size > ll_file_maxbytes(inode)) {
- CDEBUG(D_INODE, "file " DFID " too large %llu > %llu\n",
- PFID(&lli->lli_fid), attr->ia_size,
- ll_file_maxbytes(inode));
- return -EFBIG;
- }
-
- attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
- }
-
- /* POSIX: check before ATTR_*TIME_SET set (from setattr_prepare) */
- if (attr->ia_valid & TIMES_SET_FLAGS) {
- if ((!uid_eq(current_fsuid(), inode->i_uid)) &&
- !capable(CAP_FOWNER))
- return -EPERM;
- }
-
- /* We mark all of the fields "set" so MDS/OST does not re-set them */
- if (attr->ia_valid & ATTR_CTIME) {
- attr->ia_ctime = current_time(inode);
- attr->ia_valid |= ATTR_CTIME_SET;
- }
- if (!(attr->ia_valid & ATTR_ATIME_SET) &&
- (attr->ia_valid & ATTR_ATIME)) {
- attr->ia_atime = current_time(inode);
- attr->ia_valid |= ATTR_ATIME_SET;
- }
- if (!(attr->ia_valid & ATTR_MTIME_SET) &&
- (attr->ia_valid & ATTR_MTIME)) {
- attr->ia_mtime = current_time(inode);
- attr->ia_valid |= ATTR_MTIME_SET;
- }
-
- if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
- CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %llu\n",
- LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
- (s64)ktime_get_real_seconds());
-
- if (S_ISREG(inode->i_mode))
- inode_unlock(inode);
-
- /*
- * We always do an MDS RPC, even if we're only changing the size;
- * only the MDS knows whether truncate() should fail with -ETXTBUSY
- */
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data) {
- rc = -ENOMEM;
- goto out;
- }
-
- if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
- /*
- * If we are changing file size, file content is
- * modified, flag it.
- */
- attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
- op_data->op_bias |= MDS_DATA_MODIFIED;
- clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags);
- }
-
- op_data->op_attr = *attr;
-
- rc = ll_md_setattr(dentry, op_data);
- if (rc)
- goto out;
-
- if (!S_ISREG(inode->i_mode) || hsm_import) {
- rc = 0;
- goto out;
- }
-
- if (attr->ia_valid & (ATTR_SIZE |
- ATTR_ATIME | ATTR_ATIME_SET |
- ATTR_MTIME | ATTR_MTIME_SET)) {
- /* For truncate and utimes sending attributes to OSTs, setting
- * mtime/atime to the past will be performed under PW [0:EOF]
- * extent lock (new_size:EOF for truncate). It may seem
- * excessive to send mtime/atime updates to OSTs when not
- * setting times to past, but it is necessary due to possible
- * time de-synchronization between MDT inode and OST objects
- */
- rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, attr, 0);
- }
-
- /*
- * If the file was restored, it needs to set dirty flag.
- *
- * We've already sent MDS_DATA_MODIFIED flag in
- * ll_md_setattr() for truncate. However, the MDT refuses to
- * set the HS_DIRTY flag on released files, so we have to set
- * it again if the file has been restored. Please check how
- * LLIF_DATA_MODIFIED is set in vvp_io_setattr_fini().
- *
- * Please notice that if the file is not released, the previous
- * MDS_DATA_MODIFIED has taken effect and usually
- * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()).
- * This way we can save an RPC for common open + trunc
- * operation.
- */
- if (test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) {
- struct hsm_state_set hss = {
- .hss_valid = HSS_SETMASK,
- .hss_setmask = HS_DIRTY,
- };
- int rc2;
-
- rc2 = ll_hsm_state_set(inode, &hss);
- /*
- * truncate and write can happen at the same time, so that
- * the file can be set modified even though the file is not
- * restored from released state, and ll_hsm_state_set() is
- * not applicable for the file, and rc2 < 0 is normal in this
- * case.
- */
- if (rc2 < 0)
- CDEBUG(D_INFO, DFID "HSM set dirty failed: rc2 = %d\n",
- PFID(ll_inode2fid(inode)), rc2);
- }
-
-out:
- if (op_data)
- ll_finish_md_op_data(op_data);
-
- if (S_ISREG(inode->i_mode)) {
- inode_lock(inode);
- if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
- inode_dio_wait(inode);
- }
-
- ll_stats_ops_tally(ll_i2sbi(inode), (attr->ia_valid & ATTR_SIZE) ?
- LPROC_LL_TRUNC : LPROC_LL_SETATTR, 1);
-
- return rc;
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
- int mode = d_inode(de)->i_mode;
-
- if ((attr->ia_valid & (ATTR_CTIME | ATTR_SIZE | ATTR_MODE)) ==
- (ATTR_CTIME | ATTR_SIZE | ATTR_MODE))
- attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
-
- if (((attr->ia_valid & (ATTR_MODE | ATTR_FORCE | ATTR_SIZE)) ==
- (ATTR_SIZE | ATTR_MODE)) &&
- (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) ||
- (((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) &&
- !(attr->ia_mode & S_ISGID))))
- attr->ia_valid |= ATTR_FORCE;
-
- if ((attr->ia_valid & ATTR_MODE) &&
- (mode & S_ISUID) &&
- !(attr->ia_mode & S_ISUID) &&
- !(attr->ia_valid & ATTR_KILL_SUID))
- attr->ia_valid |= ATTR_KILL_SUID;
-
- if ((attr->ia_valid & ATTR_MODE) &&
- ((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) &&
- !(attr->ia_mode & S_ISGID) &&
- !(attr->ia_valid & ATTR_KILL_SGID))
- attr->ia_valid |= ATTR_KILL_SGID;
-
- return ll_setattr_raw(de, attr, false);
-}
-
-int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
- __u64 max_age, __u32 flags)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_statfs obd_osfs;
- int rc;
-
- rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
- if (rc) {
- CERROR("md_statfs fails: rc = %d\n", rc);
- return rc;
- }
-
- osfs->os_type = sb->s_magic;
-
- CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
- osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,
- osfs->os_files);
-
- if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
- flags |= OBD_STATFS_NODELAY;
-
- rc = obd_statfs_rqset(sbi->ll_dt_exp, &obd_osfs, max_age, flags);
- if (rc) {
- CERROR("obd_statfs fails: rc = %d\n", rc);
- return rc;
- }
-
- CDEBUG(D_SUPER, "OSC blocks %llu/%llu objects %llu/%llu\n",
- obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
- obd_osfs.os_files);
-
- osfs->os_bsize = obd_osfs.os_bsize;
- osfs->os_blocks = obd_osfs.os_blocks;
- osfs->os_bfree = obd_osfs.os_bfree;
- osfs->os_bavail = obd_osfs.os_bavail;
-
- /* If we don't have as many objects free on the OST as inodes
- * on the MDS, we reduce the total number of inodes to
- * compensate, so that the "inodes in use" number is correct.
- */
- if (obd_osfs.os_ffree < osfs->os_ffree) {
- osfs->os_files = (osfs->os_files - osfs->os_ffree) +
- obd_osfs.os_ffree;
- osfs->os_ffree = obd_osfs.os_ffree;
- }
-
- return rc;
-}
-
-int ll_statfs(struct dentry *de, struct kstatfs *sfs)
-{
- struct super_block *sb = de->d_sb;
- struct obd_statfs osfs;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op: at %llu jiffies\n", get_jiffies_64());
- ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
-
- /* Some amount of caching on the client is allowed */
- rc = ll_statfs_internal(sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- 0);
- if (rc)
- return rc;
-
- statfs_unpack(sfs, &osfs);
-
- /* We need to downshift for all 32-bit kernels, because we can't
- * tell if the kernel is being called via sys_statfs64() or not.
- * Stop before overflowing f_bsize - in which case it is better
- * to just risk EOVERFLOW if caller is using old sys_statfs().
- */
- if (sizeof(long) < 8) {
- while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
- sfs->f_bsize <<= 1;
-
- osfs.os_blocks >>= 1;
- osfs.os_bfree >>= 1;
- osfs.os_bavail >>= 1;
- }
- }
-
- sfs->f_blocks = osfs.os_blocks;
- sfs->f_bfree = osfs.os_bfree;
- sfs->f_bavail = osfs.os_bavail;
- sfs->f_fsid = ll_s2sbi(sb)->ll_fsid;
- return 0;
-}
-
-void ll_inode_size_lock(struct inode *inode)
-{
- struct ll_inode_info *lli;
-
- LASSERT(!S_ISDIR(inode->i_mode));
-
- lli = ll_i2info(inode);
- mutex_lock(&lli->lli_size_mutex);
-}
-
-void ll_inode_size_unlock(struct inode *inode)
-{
- struct ll_inode_info *lli;
-
- lli = ll_i2info(inode);
- mutex_unlock(&lli->lli_size_mutex);
-}
-
-int ll_update_inode(struct inode *inode, struct lustre_md *md)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct mdt_body *body = md->body;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- if (body->mbo_valid & OBD_MD_FLEASIZE)
- cl_file_inode_init(inode, md);
-
- if (S_ISDIR(inode->i_mode)) {
- int rc;
-
- rc = ll_update_lsm_md(inode, md);
- if (rc)
- return rc;
- }
-
-#ifdef CONFIG_FS_POSIX_ACL
- if (body->mbo_valid & OBD_MD_FLACL) {
- spin_lock(&lli->lli_lock);
- if (lli->lli_posix_acl)
- posix_acl_release(lli->lli_posix_acl);
- lli->lli_posix_acl = md->posix_acl;
- spin_unlock(&lli->lli_lock);
- }
-#endif
- inode->i_ino = cl_fid_build_ino(&body->mbo_fid1,
- sbi->ll_flags & LL_SBI_32BIT_API);
- inode->i_generation = cl_fid_build_gen(&body->mbo_fid1);
-
- if (body->mbo_valid & OBD_MD_FLATIME) {
- if (body->mbo_atime > LTIME_S(inode->i_atime))
- LTIME_S(inode->i_atime) = body->mbo_atime;
- lli->lli_atime = body->mbo_atime;
- }
- if (body->mbo_valid & OBD_MD_FLMTIME) {
- if (body->mbo_mtime > LTIME_S(inode->i_mtime)) {
- CDEBUG(D_INODE,
- "setting ino %lu mtime from %lu to %llu\n",
- inode->i_ino, LTIME_S(inode->i_mtime),
- body->mbo_mtime);
- LTIME_S(inode->i_mtime) = body->mbo_mtime;
- }
- lli->lli_mtime = body->mbo_mtime;
- }
- if (body->mbo_valid & OBD_MD_FLCTIME) {
- if (body->mbo_ctime > LTIME_S(inode->i_ctime))
- LTIME_S(inode->i_ctime) = body->mbo_ctime;
- lli->lli_ctime = body->mbo_ctime;
- }
- if (body->mbo_valid & OBD_MD_FLMODE)
- inode->i_mode = (inode->i_mode & S_IFMT) |
- (body->mbo_mode & ~S_IFMT);
- if (body->mbo_valid & OBD_MD_FLTYPE)
- inode->i_mode = (inode->i_mode & ~S_IFMT) |
- (body->mbo_mode & S_IFMT);
- LASSERT(inode->i_mode != 0);
- if (S_ISREG(inode->i_mode))
- inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1,
- LL_MAX_BLKSIZE_BITS);
- else
- inode->i_blkbits = inode->i_sb->s_blocksize_bits;
- if (body->mbo_valid & OBD_MD_FLUID)
- inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid);
- if (body->mbo_valid & OBD_MD_FLGID)
- inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid);
- if (body->mbo_valid & OBD_MD_FLFLAGS)
- inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags);
- if (body->mbo_valid & OBD_MD_FLNLINK)
- set_nlink(inode, body->mbo_nlink);
- if (body->mbo_valid & OBD_MD_FLRDEV)
- inode->i_rdev = old_decode_dev(body->mbo_rdev);
-
- if (body->mbo_valid & OBD_MD_FLID) {
- /* FID shouldn't be changed! */
- if (fid_is_sane(&lli->lli_fid)) {
- LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1),
- "Trying to change FID " DFID " to the " DFID ", inode " DFID "(%p)\n",
- PFID(&lli->lli_fid), PFID(&body->mbo_fid1),
- PFID(ll_inode2fid(inode)), inode);
- } else {
- lli->lli_fid = body->mbo_fid1;
- }
- }
-
- LASSERT(fid_seq(&lli->lli_fid) != 0);
-
- if (body->mbo_valid & OBD_MD_FLSIZE) {
- i_size_write(inode, body->mbo_size);
-
- CDEBUG(D_VFSTRACE, "inode=" DFID ", updating i_size %llu\n",
- PFID(ll_inode2fid(inode)),
- (unsigned long long)body->mbo_size);
-
- if (body->mbo_valid & OBD_MD_FLBLOCKS)
- inode->i_blocks = body->mbo_blocks;
- }
-
- if (body->mbo_valid & OBD_MD_TSTATE) {
- if (body->mbo_t_state & MS_RESTORE)
- set_bit(LLIF_FILE_RESTORING, &lli->lli_flags);
- }
-
- return 0;
-}
-
-int ll_read_inode2(struct inode *inode, void *opaque)
-{
- struct lustre_md *md = opaque;
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(&lli->lli_fid), inode);
-
- /* Core attributes from the MDS first. This is a new inode, and
- * the VFS doesn't zero times in the core inode so we have to do
- * it ourselves. They will be overwritten by either MDS or OST
- * attributes - we just need to make sure they aren't newer.
- */
- LTIME_S(inode->i_mtime) = 0;
- LTIME_S(inode->i_atime) = 0;
- LTIME_S(inode->i_ctime) = 0;
- inode->i_rdev = 0;
- rc = ll_update_inode(inode, md);
- if (rc)
- return rc;
-
- /* OIDEBUG(inode); */
-
- if (S_ISREG(inode->i_mode)) {
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- inode->i_op = &ll_file_inode_operations;
- inode->i_fop = sbi->ll_fop;
- inode->i_mapping->a_ops = (struct address_space_operations *)&ll_aops;
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &ll_dir_inode_operations;
- inode->i_fop = &ll_dir_operations;
- } else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &ll_fast_symlink_inode_operations;
- } else {
- inode->i_op = &ll_special_inode_operations;
-
- init_special_inode(inode, inode->i_mode,
- inode->i_rdev);
- }
-
- return 0;
-}
-
-void ll_delete_inode(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- if (S_ISREG(inode->i_mode) && lli->lli_clob)
- /* discard all dirty pages before truncating them, required by
- * osc_extent implementation at LU-1030.
- */
- cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
- CL_FSYNC_LOCAL, 1);
-
- truncate_inode_pages_final(&inode->i_data);
-
- LASSERTF(!inode->i_data.nrpages,
- "inode=" DFID "(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
- PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages);
-
- ll_clear_inode(inode);
- clear_inode(inode);
-}
-
-int ll_iocontrol(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- int rc, flags = 0;
-
- switch (cmd) {
- case FSFILT_IOC_GETFLAGS: {
- struct mdt_body *body;
- struct md_op_data *op_data;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
- 0, 0, LUSTRE_OPC_ANY,
- NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLFLAGS;
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc) {
- CERROR("%s: failure inode " DFID ": rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name,
- PFID(ll_inode2fid(inode)), rc);
- return -abs(rc);
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- flags = body->mbo_flags;
-
- ptlrpc_req_finished(req);
-
- return put_user(flags, (int __user *)arg);
- }
- case FSFILT_IOC_SETFLAGS: {
- struct md_op_data *op_data;
- struct cl_object *obj;
- struct iattr *attr;
-
- if (get_user(flags, (int __user *)arg))
- return -EFAULT;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_attr_flags = flags;
- op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
- rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &req);
- ll_finish_md_op_data(op_data);
- ptlrpc_req_finished(req);
- if (rc)
- return rc;
-
- inode->i_flags = ll_ext_to_inode_flags(flags);
-
- obj = ll_i2info(inode)->lli_clob;
- if (!obj)
- return 0;
-
- attr = kzalloc(sizeof(*attr), GFP_NOFS);
- if (!attr)
- return -ENOMEM;
-
- attr->ia_valid = ATTR_ATTR_FLAG;
- rc = cl_setattr_ost(obj, attr, flags);
- kfree(attr);
- return rc;
- }
- default:
- return -ENOSYS;
- }
-
- return 0;
-}
-
-int ll_flush_ctx(struct inode *inode)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- CDEBUG(D_SEC, "flush context for user %d\n",
- from_kuid(&init_user_ns, current_uid()));
-
- obd_set_info_async(NULL, sbi->ll_md_exp,
- sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
- 0, NULL, NULL);
- obd_set_info_async(NULL, sbi->ll_dt_exp,
- sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
- 0, NULL, NULL);
- return 0;
-}
-
-/* umount -f client means force down, don't save state */
-void ll_umount_begin(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct obd_ioctl_data *ioc_data;
- int cnt = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
- sb->s_count, atomic_read(&sb->s_active));
-
- obd = class_exp2obd(sbi->ll_md_exp);
- if (!obd) {
- CERROR("Invalid MDC connection handle %#llx\n",
- sbi->ll_md_exp->exp_handle.h_cookie);
- return;
- }
- obd->obd_force = 1;
-
- obd = class_exp2obd(sbi->ll_dt_exp);
- if (!obd) {
- CERROR("Invalid LOV connection handle %#llx\n",
- sbi->ll_dt_exp->exp_handle.h_cookie);
- return;
- }
- obd->obd_force = 1;
-
- ioc_data = kzalloc(sizeof(*ioc_data), GFP_NOFS);
- if (ioc_data) {
- obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp,
- sizeof(*ioc_data), ioc_data, NULL);
-
- obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp,
- sizeof(*ioc_data), ioc_data, NULL);
-
- kfree(ioc_data);
- }
-
- /* Really, we'd like to wait until there are no requests outstanding,
- * and then continue. For now, we just periodically checking for vfs
- * to decrement mnt_cnt and hope to finish it within 10sec.
- */
- while (cnt < 10 && !may_umount(sbi->ll_mnt.mnt)) {
- schedule_timeout_uninterruptible(HZ);
- cnt++;
- }
-
- schedule();
-}
-
-int ll_remount_fs(struct super_block *sb, int *flags, char *data)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- char *profilenm = get_profile_name(sb);
- int err;
- __u32 read_only;
-
- if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
- read_only = *flags & SB_RDONLY;
- err = obd_set_info_async(NULL, sbi->ll_md_exp,
- sizeof(KEY_READ_ONLY),
- KEY_READ_ONLY, sizeof(read_only),
- &read_only, NULL);
- if (err) {
- LCONSOLE_WARN("Failed to remount %s %s (%d)\n",
- profilenm, read_only ?
- "read-only" : "read-write", err);
- return err;
- }
-
- if (read_only)
- sb->s_flags |= SB_RDONLY;
- else
- sb->s_flags &= ~SB_RDONLY;
-
- if (sbi->ll_flags & LL_SBI_VERBOSE)
- LCONSOLE_WARN("Remounted %s %s\n", profilenm,
- read_only ? "read-only" : "read-write");
- }
- return 0;
-}
-
-/**
- * Cleanup the open handle that is cached on MDT-side.
- *
- * For open case, the client side open handling thread may hit error
- * after the MDT grant the open. Under such case, the client should
- * send close RPC to the MDT as cleanup; otherwise, the open handle
- * on the MDT will be leaked there until the client umount or evicted.
- *
- * In further, if someone unlinked the file, because the open handle
- * holds the reference on such file/object, then it will block the
- * subsequent threads that want to locate such object via FID.
- *
- * \param[in] sb super block for this file-system
- * \param[in] open_req pointer to the original open request
- */
-void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req)
-{
- struct mdt_body *body;
- struct md_op_data *op_data;
- struct ptlrpc_request *close_req = NULL;
- struct obd_export *exp = ll_s2sbi(sb)->ll_md_exp;
-
- body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY);
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return;
-
- op_data->op_fid1 = body->mbo_fid1;
- op_data->op_handle = body->mbo_handle;
- op_data->op_mod_time = get_seconds();
- md_close(exp, op_data, NULL, &close_req);
- ptlrpc_req_finished(close_req);
- ll_finish_md_op_data(op_data);
-}
-
-int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
- struct super_block *sb, struct lookup_intent *it)
-{
- struct ll_sb_info *sbi = NULL;
- struct lustre_md md = { NULL };
- int rc;
-
- LASSERT(*inode || sb);
- sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
- rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp,
- sbi->ll_md_exp, &md);
- if (rc)
- goto cleanup;
-
- if (*inode) {
- rc = ll_update_inode(*inode, &md);
- if (rc)
- goto out;
- } else {
- LASSERT(sb);
-
- /*
- * At this point server returns to client's same fid as client
- * generated for creating. So using ->fid1 is okay here.
- */
- if (!fid_is_sane(&md.body->mbo_fid1)) {
- CERROR("%s: Fid is insane " DFID "\n",
- ll_get_fsname(sb, NULL, 0),
- PFID(&md.body->mbo_fid1));
- rc = -EINVAL;
- goto out;
- }
-
- *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1,
- sbi->ll_flags & LL_SBI_32BIT_API),
- &md);
- if (IS_ERR(*inode)) {
-#ifdef CONFIG_FS_POSIX_ACL
- if (md.posix_acl) {
- posix_acl_release(md.posix_acl);
- md.posix_acl = NULL;
- }
-#endif
- rc = PTR_ERR(*inode);
- CERROR("new_inode -fatal: rc %d\n", rc);
- goto out;
- }
- }
-
- /* Handling piggyback layout lock.
- * Layout lock can be piggybacked by getattr and open request.
- * The lsm can be applied to inode only if it comes with a layout lock
- * otherwise correct layout may be overwritten, for example:
- * 1. proc1: mdt returns a lsm but not granting layout
- * 2. layout was changed by another client
- * 3. proc2: refresh layout and layout lock granted
- * 4. proc1: to apply a stale layout
- */
- if (it && it->it_lock_mode != 0) {
- struct lustre_handle lockh;
- struct ldlm_lock *lock;
-
- lockh.cookie = it->it_lock_handle;
- lock = ldlm_handle2lock(&lockh);
- LASSERT(lock);
- if (ldlm_has_layout(lock)) {
- struct cl_object_conf conf;
-
- memset(&conf, 0, sizeof(conf));
- conf.coc_opc = OBJECT_CONF_SET;
- conf.coc_inode = *inode;
- conf.coc_lock = lock;
- conf.u.coc_layout = md.layout;
- (void)ll_layout_conf(*inode, &conf);
- }
- LDLM_LOCK_PUT(lock);
- }
-
-out:
- md_free_lustre_md(sbi->ll_md_exp, &md);
-cleanup:
- if (rc != 0 && it && it->it_op & IT_OPEN)
- ll_open_cleanup(sb ? sb : (*inode)->i_sb, req);
-
- return rc;
-}
-
-int ll_obd_statfs(struct inode *inode, void __user *arg)
-{
- struct ll_sb_info *sbi = NULL;
- struct obd_export *exp;
- char *buf = NULL;
- struct obd_ioctl_data *data = NULL;
- __u32 type;
- int len = 0, rc;
-
- if (!inode) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- sbi = ll_i2sbi(inode);
- if (!sbi) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- rc = obd_ioctl_getdata(&buf, &len, arg);
- if (rc)
- goto out_statfs;
-
- data = (void *)buf;
- if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
- !data->ioc_pbuf1 || !data->ioc_pbuf2) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- if (data->ioc_inllen1 != sizeof(__u32) ||
- data->ioc_inllen2 != sizeof(__u32) ||
- data->ioc_plen1 != sizeof(struct obd_statfs) ||
- data->ioc_plen2 != sizeof(struct obd_uuid)) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
- if (type & LL_STATFS_LMV) {
- exp = sbi->ll_md_exp;
- } else if (type & LL_STATFS_LOV) {
- exp = sbi->ll_dt_exp;
- } else {
- rc = -ENODEV;
- goto out_statfs;
- }
-
- rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, NULL);
- if (rc)
- goto out_statfs;
-out_statfs:
- kvfree(buf);
- return rc;
-}
-
-int ll_process_config(struct lustre_cfg *lcfg)
-{
- char *ptr;
- void *sb;
- struct lprocfs_static_vars lvars;
- unsigned long x;
- int rc = 0;
-
- lprocfs_llite_init_vars(&lvars);
-
- /* The instance name contains the sb: lustre-client-aacfe000 */
- ptr = strrchr(lustre_cfg_string(lcfg, 0), '-');
- if (!ptr || !*(++ptr))
- return -EINVAL;
- rc = kstrtoul(ptr, 16, &x);
- if (rc != 0)
- return -EINVAL;
- sb = (void *)x;
- /* This better be a real Lustre superblock! */
- LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic ==
- LMD_MAGIC);
-
- /* Note we have not called client_common_fill_super yet, so
- * proc fns must be able to handle that!
- */
- rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars,
- lcfg, sb);
- if (rc > 0)
- rc = 0;
- return rc;
-}
-
-/* this function prepares md_op_data hint for passing ot down to MD stack. */
-struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
- struct inode *i1, struct inode *i2,
- const char *name, size_t namelen,
- u32 mode, __u32 opc, void *data)
-{
- if (!name) {
- /* Do not reuse namelen for something else. */
- if (namelen)
- return ERR_PTR(-EINVAL);
- } else {
- if (namelen > ll_i2sbi(i1)->ll_namelen)
- return ERR_PTR(-ENAMETOOLONG);
-
- if (!lu_name_is_valid_2(name, namelen))
- return ERR_PTR(-EINVAL);
- }
-
- if (!op_data)
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
-
- if (!op_data)
- return ERR_PTR(-ENOMEM);
-
- ll_i2gids(op_data->op_suppgids, i1, i2);
- op_data->op_fid1 = *ll_inode2fid(i1);
- op_data->op_default_stripe_offset = -1;
- if (S_ISDIR(i1->i_mode)) {
- op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
- if (opc == LUSTRE_OPC_MKDIR)
- op_data->op_default_stripe_offset =
- ll_i2info(i1)->lli_def_stripe_offset;
- }
-
- if (i2) {
- op_data->op_fid2 = *ll_inode2fid(i2);
- if (S_ISDIR(i2->i_mode))
- op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md;
- } else {
- fid_zero(&op_data->op_fid2);
- }
-
- if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH)
- op_data->op_cli_flags |= CLI_HASH64;
-
- if (ll_need_32bit_api(ll_i2sbi(i1)))
- op_data->op_cli_flags |= CLI_API32;
-
- op_data->op_name = name;
- op_data->op_namelen = namelen;
- op_data->op_mode = mode;
- op_data->op_mod_time = ktime_get_real_seconds();
- op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
- op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
- op_data->op_cap = cfs_curproc_cap_pack();
- if ((opc == LUSTRE_OPC_CREATE) && name &&
- filename_is_volatile(name, namelen, &op_data->op_mds))
- op_data->op_bias |= MDS_CREATE_VOLATILE;
- else
- op_data->op_mds = 0;
- op_data->op_data = data;
-
- return op_data;
-}
-
-void ll_finish_md_op_data(struct md_op_data *op_data)
-{
- kfree(op_data);
-}
-
-int ll_show_options(struct seq_file *seq, struct dentry *dentry)
-{
- struct ll_sb_info *sbi;
-
- LASSERT(seq && dentry);
- sbi = ll_s2sbi(dentry->d_sb);
-
- if (sbi->ll_flags & LL_SBI_NOLCK)
- seq_puts(seq, ",nolock");
-
- if (sbi->ll_flags & LL_SBI_FLOCK)
- seq_puts(seq, ",flock");
-
- if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
- seq_puts(seq, ",localflock");
-
- if (sbi->ll_flags & LL_SBI_USER_XATTR)
- seq_puts(seq, ",user_xattr");
-
- if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
- seq_puts(seq, ",lazystatfs");
-
- if (sbi->ll_flags & LL_SBI_USER_FID2PATH)
- seq_puts(seq, ",user_fid2path");
-
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
- seq_puts(seq, ",always_ping");
-
- return 0;
-}
-
-/**
- * Get obd name by cmd, and copy out to user space
- */
-int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct obd_device *obd;
-
- if (cmd == OBD_IOC_GETDTNAME)
- obd = class_exp2obd(sbi->ll_dt_exp);
- else if (cmd == OBD_IOC_GETMDNAME)
- obd = class_exp2obd(sbi->ll_md_exp);
- else
- return -EINVAL;
-
- if (!obd)
- return -ENOENT;
-
- if (copy_to_user((void __user *)arg, obd->obd_name,
- strlen(obd->obd_name) + 1))
- return -EFAULT;
-
- return 0;
-}
-
-/**
- * Get lustre file system name by \a sbi. If \a buf is provided(non-NULL), the
- * fsname will be returned in this buffer; otherwise, a static buffer will be
- * used to store the fsname and returned to caller.
- */
-char *ll_get_fsname(struct super_block *sb, char *buf, int buflen)
-{
- static char fsname_static[MTI_NAME_MAXLEN];
- struct lustre_sb_info *lsi = s2lsi(sb);
- char *ptr;
- int len;
-
- if (!buf) {
- /* this means the caller wants to use static buffer
- * and it doesn't care about race. Usually this is
- * in error reporting path
- */
- buf = fsname_static;
- buflen = sizeof(fsname_static);
- }
-
- len = strlen(lsi->lsi_lmd->lmd_profile);
- ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
- if (ptr && (strcmp(ptr, "-client") == 0))
- len -= 7;
-
- if (unlikely(len >= buflen))
- len = buflen - 1;
- strncpy(buf, lsi->lsi_lmd->lmd_profile, len);
- buf[len] = '\0';
-
- return buf;
-}
-
-void ll_dirty_page_discard_warn(struct page *page, int ioret)
-{
- char *buf, *path = NULL;
- struct dentry *dentry = NULL;
- struct vvp_object *obj = cl_inode2vvp(page->mapping->host);
-
- /* this can be called inside spin lock so use GFP_ATOMIC. */
- buf = (char *)__get_free_page(GFP_ATOMIC);
- if (buf) {
- dentry = d_find_alias(page->mapping->host);
- if (dentry)
- path = dentry_path_raw(dentry, buf, PAGE_SIZE);
- }
-
- CDEBUG(D_WARNING,
- "%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n",
- ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
- s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
- PFID(&obj->vob_header.coh_lu.loh_fid),
- (path && !IS_ERR(path)) ? path : "", ioret);
-
- if (dentry)
- dput(dentry);
-
- if (buf)
- free_page((unsigned long)buf);
-}
-
-ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
- struct lov_user_md **kbuf)
-{
- struct lov_user_md lum;
- ssize_t lum_size;
-
- if (copy_from_user(&lum, md, sizeof(lum))) {
- lum_size = -EFAULT;
- goto no_kbuf;
- }
-
- lum_size = ll_lov_user_md_size(&lum);
- if (lum_size < 0)
- goto no_kbuf;
-
- *kbuf = kzalloc(lum_size, GFP_NOFS);
- if (!*kbuf) {
- lum_size = -ENOMEM;
- goto no_kbuf;
- }
-
- if (copy_from_user(*kbuf, md, lum_size) != 0) {
- kfree(*kbuf);
- *kbuf = NULL;
- lum_size = -EFAULT;
- }
-no_kbuf:
- return lum_size;
-}
-
-/*
- * Compute llite root squash state after a change of root squash
- * configuration setting or add/remove of a lnet nid
- */
-void ll_compute_rootsquash_state(struct ll_sb_info *sbi)
-{
- struct root_squash_info *squash = &sbi->ll_squash;
- struct lnet_process_id id;
- bool matched;
- int i;
-
- /* Update norootsquash flag */
- down_write(&squash->rsi_sem);
- if (list_empty(&squash->rsi_nosquash_nids)) {
- sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
- } else {
- /*
- * Do not apply root squash as soon as one of our NIDs is
- * in the nosquash_nids list
- */
- matched = false;
- i = 0;
-
- while (LNetGetId(i++, &id) != -ENOENT) {
- if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
- continue;
- if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) {
- matched = true;
- break;
- }
- }
- if (matched)
- sbi->ll_flags |= LL_SBI_NOROOTSQUASH;
- else
- sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
- }
- up_write(&squash->rsi_sem);
-}
-
-/**
- * Parse linkea content to extract information about a given hardlink
- *
- * \param[in] ldata - Initialized linkea data
- * \param[in] linkno - Link identifier
- * \param[out] parent_fid - The entry's parent FID
- * \param[in] size - Entry name destination buffer
- *
- * \retval 0 on success
- * \retval Appropriate negative error code on failure
- */
-static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno,
- struct lu_fid *parent_fid, struct lu_name *ln)
-{
- unsigned int idx;
- int rc;
-
- rc = linkea_init_with_rec(ldata);
- if (rc < 0)
- return rc;
-
- if (linkno >= ldata->ld_leh->leh_reccount)
- /* beyond last link */
- return -ENODATA;
-
- linkea_first_entry(ldata);
- for (idx = 0; ldata->ld_lee; idx++) {
- linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, ln,
- parent_fid);
- if (idx == linkno)
- break;
-
- linkea_next_entry(ldata);
- }
-
- if (idx < linkno)
- return -ENODATA;
-
- return 0;
-}
-
-/**
- * Get parent FID and name of an identified link. Operation is performed for
- * a given link number, letting the caller iterate over linkno to list one or
- * all links of an entry.
- *
- * \param[in] file - File descriptor against which to perform the operation
- * \param[in,out] arg - User-filled structure containing the linkno to operate
- * on and the available size. It is eventually filled
- * with the requested information or left untouched on
- * error
- *
- * \retval - 0 on success
- * \retval - Appropriate negative error code on failure
- */
-int ll_getparent(struct file *file, struct getparent __user *arg)
-{
- struct inode *inode = file_inode(file);
- struct linkea_data *ldata;
- struct lu_fid parent_fid;
- struct lu_buf buf = {
- .lb_buf = NULL,
- .lb_len = 0
- };
- struct lu_name ln;
- u32 name_size;
- u32 linkno;
- int rc;
-
- if (!capable(CAP_DAC_READ_SEARCH) &&
- !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
- return -EPERM;
-
- if (get_user(name_size, &arg->gp_name_size))
- return -EFAULT;
-
- if (get_user(linkno, &arg->gp_linkno))
- return -EFAULT;
-
- if (name_size > PATH_MAX)
- return -EINVAL;
-
- ldata = kzalloc(sizeof(*ldata), GFP_NOFS);
- if (!ldata)
- return -ENOMEM;
-
- rc = linkea_data_new(ldata, &buf);
- if (rc < 0)
- goto ldata_free;
-
- rc = ll_xattr_list(inode, XATTR_NAME_LINK, XATTR_TRUSTED_T, buf.lb_buf,
- buf.lb_len, OBD_MD_FLXATTR);
- if (rc < 0)
- goto lb_free;
-
- rc = ll_linkea_decode(ldata, linkno, &parent_fid, &ln);
- if (rc < 0)
- goto lb_free;
-
- if (ln.ln_namelen >= name_size) {
- rc = -EOVERFLOW;
- goto lb_free;
- }
-
- if (copy_to_user(&arg->gp_fid, &parent_fid, sizeof(arg->gp_fid))) {
- rc = -EFAULT;
- goto lb_free;
- }
-
- if (copy_to_user(&arg->gp_name, ln.ln_name, ln.ln_namelen)) {
- rc = -EFAULT;
- goto lb_free;
- }
-
- if (put_user('\0', arg->gp_name + ln.ln_namelen)) {
- rc = -EFAULT;
- goto lb_free;
- }
-
-lb_free:
- kvfree(buf.lb_buf);
-ldata_free:
- kfree(ldata);
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
deleted file mode 100644
index 214b07554e62..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ /dev/null
@@ -1,478 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-static const struct vm_operations_struct ll_file_vm_ops;
-
-void policy_from_vma(union ldlm_policy_data *policy,
- struct vm_area_struct *vma, unsigned long addr,
- size_t count)
-{
- policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) +
- (vma->vm_pgoff << PAGE_SHIFT);
- policy->l_extent.end = (policy->l_extent.start + count - 1) |
- ~PAGE_MASK;
-}
-
-struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
- size_t count)
-{
- struct vm_area_struct *vma, *ret = NULL;
-
- /* mmap_sem must have been held by caller. */
- LASSERT(!down_write_trylock(&mm->mmap_sem));
-
- for (vma = find_vma(mm, addr);
- vma && vma->vm_start < (addr + count); vma = vma->vm_next) {
- if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
- vma->vm_flags & VM_SHARED) {
- ret = vma;
- break;
- }
- }
- return ret;
-}
-
-/**
- * API independent part for page fault initialization.
- * \param vma - virtual memory area addressed to page fault
- * \param env - corespondent lu_env to processing
- * \param index - page index corespondent to fault.
- * \parm ra_flags - vma readahead flags.
- *
- * \return error codes from cl_io_init.
- */
-static struct cl_io *
-ll_fault_io_init(struct lu_env *env, struct vm_area_struct *vma,
- pgoff_t index, unsigned long *ra_flags)
-{
- struct file *file = vma->vm_file;
- struct inode *inode = file_inode(file);
- struct cl_io *io;
- struct cl_fault_io *fio;
- int rc;
-
- if (ll_file_nolock(file))
- return ERR_PTR(-EOPNOTSUPP);
-
-restart:
- io = vvp_env_thread_io(env);
- io->ci_obj = ll_i2info(inode)->lli_clob;
- LASSERT(io->ci_obj);
-
- fio = &io->u.ci_fault;
- fio->ft_index = index;
- fio->ft_executable = vma->vm_flags & VM_EXEC;
-
- /*
- * disable VM_SEQ_READ and use VM_RAND_READ to make sure that
- * the kernel will not read other pages not covered by ldlm in
- * filemap_nopage. we do our readahead in ll_readpage.
- */
- if (ra_flags)
- *ra_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
- vma->vm_flags &= ~VM_SEQ_READ;
- vma->vm_flags |= VM_RAND_READ;
-
- CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags,
- fio->ft_index, fio->ft_executable);
-
- rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj);
- if (rc == 0) {
- struct vvp_io *vio = vvp_env_io(env);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
- LASSERT(vio->vui_cl.cis_io == io);
-
- /* mmap lock must be MANDATORY it has to cache pages. */
- io->ci_lockreq = CILR_MANDATORY;
- vio->vui_fd = fd;
- } else {
- LASSERT(rc < 0);
- cl_io_fini(env, io);
- if (io->ci_need_restart)
- goto restart;
-
- io = ERR_PTR(rc);
- }
-
- return io;
-}
-
-/* Sharing code of page_mkwrite method for rhel5 and rhel6 */
-static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
- bool *retry)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct vvp_io *vio;
- int result;
- u16 refcheck;
- sigset_t set;
- struct inode *inode;
- struct ll_inode_info *lli;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = ll_fault_io_init(env, vma, vmpage->index, NULL);
- if (IS_ERR(io)) {
- result = PTR_ERR(io);
- goto out;
- }
-
- result = io->ci_result;
- if (result < 0)
- goto out_io;
-
- io->u.ci_fault.ft_mkwrite = 1;
- io->u.ci_fault.ft_writable = 1;
-
- vio = vvp_env_io(env);
- vio->u.fault.ft_vma = vma;
- vio->u.fault.ft_vmpage = vmpage;
-
- cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM), &set);
-
- inode = vvp_object_inode(io->ci_obj);
- lli = ll_i2info(inode);
-
- result = cl_io_loop(env, io);
-
- cfs_restore_sigs(&set);
-
- if (result == 0) {
- struct inode *inode = file_inode(vma->vm_file);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- lock_page(vmpage);
- if (!vmpage->mapping) {
- unlock_page(vmpage);
-
- /* page was truncated and lock was cancelled, return
- * ENODATA so that VM_FAULT_NOPAGE will be returned
- * to handle_mm_fault().
- */
- if (result == 0)
- result = -ENODATA;
- } else if (!PageDirty(vmpage)) {
- /* race, the page has been cleaned by ptlrpcd after
- * it was unlocked, it has to be added into dirty
- * cache again otherwise this soon-to-dirty page won't
- * consume any grants, even worse if this page is being
- * transferred because it will break RPC checksum.
- */
- unlock_page(vmpage);
-
- CDEBUG(D_MMAP,
- "Race on page_mkwrite %p/%lu, page has been written out, retry.\n",
- vmpage, vmpage->index);
-
- *retry = true;
- result = -EAGAIN;
- }
-
- if (!result)
- set_bit(LLIF_DATA_MODIFIED, &lli->lli_flags);
- }
-
-out_io:
- cl_io_fini(env, io);
-out:
- cl_env_put(env, &refcheck);
- CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result);
- LASSERT(ergo(result == 0, PageLocked(vmpage)));
-
- return result;
-}
-
-static inline int to_fault_error(int result)
-{
- switch (result) {
- case 0:
- result = VM_FAULT_LOCKED;
- break;
- case -EFAULT:
- result = VM_FAULT_NOPAGE;
- break;
- case -ENOMEM:
- result = VM_FAULT_OOM;
- break;
- default:
- result = VM_FAULT_SIGBUS;
- break;
- }
- return result;
-}
-
-/**
- * Lustre implementation of a vm_operations_struct::fault() method, called by
- * VM to server page fault (both in kernel and user space).
- *
- * \param vma - is virtual area struct related to page fault
- * \param vmf - structure which describe type and address where hit fault
- *
- * \return allocated and filled _locked_ page for address
- * \retval VM_FAULT_ERROR on general error
- * \retval NOPAGE_OOM not have memory for allocate new page
- */
-static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct vvp_io *vio = NULL;
- struct page *vmpage;
- unsigned long ra_flags;
- int result = 0;
- int fault_ret = 0;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags);
- if (IS_ERR(io)) {
- result = to_fault_error(PTR_ERR(io));
- goto out;
- }
-
- result = io->ci_result;
- if (result == 0) {
- vio = vvp_env_io(env);
- vio->u.fault.ft_vma = vma;
- vio->u.fault.ft_vmpage = NULL;
- vio->u.fault.ft_vmf = vmf;
- vio->u.fault.ft_flags = 0;
- vio->u.fault.ft_flags_valid = false;
-
- /* May call ll_readpage() */
- ll_cl_add(vma->vm_file, env, io);
-
- result = cl_io_loop(env, io);
-
- ll_cl_remove(vma->vm_file, env);
-
- /* ft_flags are only valid if we reached
- * the call to filemap_fault
- */
- if (vio->u.fault.ft_flags_valid)
- fault_ret = vio->u.fault.ft_flags;
-
- vmpage = vio->u.fault.ft_vmpage;
- if (result != 0 && vmpage) {
- put_page(vmpage);
- vmf->page = NULL;
- }
- }
- cl_io_fini(env, io);
-
- vma->vm_flags |= ra_flags;
-
-out:
- cl_env_put(env, &refcheck);
- if (result != 0 && !(fault_ret & VM_FAULT_RETRY))
- fault_ret |= to_fault_error(result);
-
- CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result);
- return fault_ret;
-}
-
-static int ll_fault(struct vm_fault *vmf)
-{
- int count = 0;
- bool printed = false;
- int result;
- sigset_t set;
-
- /* Only SIGKILL and SIGTERM are allowed for fault/nopage/mkwrite
- * so that it can be killed by admin but not cause segfault by
- * other signals.
- */
- cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM), &set);
-
-restart:
- result = ll_fault0(vmf->vma, vmf);
- LASSERT(!(result & VM_FAULT_LOCKED));
- if (result == 0) {
- struct page *vmpage = vmf->page;
-
- /* check if this page has been truncated */
- lock_page(vmpage);
- if (unlikely(!vmpage->mapping)) { /* unlucky */
- unlock_page(vmpage);
- put_page(vmpage);
- vmf->page = NULL;
-
- if (!printed && ++count > 16) {
- CWARN("the page is under heavy contention, maybe your app(%s) needs revising :-)\n",
- current->comm);
- printed = true;
- }
-
- goto restart;
- }
-
- result = VM_FAULT_LOCKED;
- }
- cfs_restore_sigs(&set);
- return result;
-}
-
-static int ll_page_mkwrite(struct vm_fault *vmf)
-{
- struct vm_area_struct *vma = vmf->vma;
- int count = 0;
- bool printed = false;
- bool retry;
- int result;
-
- file_update_time(vma->vm_file);
- do {
- retry = false;
- result = ll_page_mkwrite0(vma, vmf->page, &retry);
-
- if (!printed && ++count > 16) {
- const struct dentry *de = vma->vm_file->f_path.dentry;
-
- CWARN("app(%s): the page %lu of file " DFID " is under heavy contention\n",
- current->comm, vmf->pgoff,
- PFID(ll_inode2fid(de->d_inode)));
- printed = true;
- }
- } while (retry);
-
- switch (result) {
- case 0:
- LASSERT(PageLocked(vmf->page));
- result = VM_FAULT_LOCKED;
- break;
- case -ENODATA:
- case -EAGAIN:
- case -EFAULT:
- result = VM_FAULT_NOPAGE;
- break;
- case -ENOMEM:
- result = VM_FAULT_OOM;
- break;
- default:
- result = VM_FAULT_SIGBUS;
- break;
- }
-
- return result;
-}
-
-/**
- * To avoid cancel the locks covering mmapped region for lock cache pressure,
- * we track the mapped vma count in vvp_object::vob_mmap_cnt.
- */
-static void ll_vm_open(struct vm_area_struct *vma)
-{
- struct inode *inode = file_inode(vma->vm_file);
- struct vvp_object *vob = cl_inode2vvp(inode);
-
- LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
- atomic_inc(&vob->vob_mmap_cnt);
-}
-
-/**
- * Dual to ll_vm_open().
- */
-static void ll_vm_close(struct vm_area_struct *vma)
-{
- struct inode *inode = file_inode(vma->vm_file);
- struct vvp_object *vob = cl_inode2vvp(inode);
-
- atomic_dec(&vob->vob_mmap_cnt);
- LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
-}
-
-/* XXX put nice comment here. talk about __free_pte -> dirty pages and
- * nopage's reference passing to the pte
- */
-int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last)
-{
- int rc = -ENOENT;
-
- LASSERTF(last > first, "last %llu first %llu\n", last, first);
- if (mapping_mapped(mapping)) {
- rc = 0;
- unmap_mapping_range(mapping, first + PAGE_SIZE - 1,
- last - first + 1, 0);
- }
-
- return rc;
-}
-
-static const struct vm_operations_struct ll_file_vm_ops = {
- .fault = ll_fault,
- .page_mkwrite = ll_page_mkwrite,
- .open = ll_vm_open,
- .close = ll_vm_close,
-};
-
-int ll_file_mmap(struct file *file, struct vm_area_struct *vma)
-{
- struct inode *inode = file_inode(file);
- int rc;
-
- if (ll_file_nolock(file))
- return -EOPNOTSUPP;
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_MAP, 1);
- rc = generic_file_mmap(file, vma);
- if (rc == 0) {
- vma->vm_ops = &ll_file_vm_ops;
- vma->vm_ops->open(vma);
- /* update the inode's size and mtime */
- rc = ll_glimpse_size(inode);
- }
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
deleted file mode 100644
index a6a1d80c711a..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ /dev/null
@@ -1,375 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/lustre/llite/llite_nfs.c
- *
- * NFS export of Lustre Light File System
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- * Author: Huang Hua <huanghua@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-#include "llite_internal.h"
-#include <linux/exportfs.h>
-
-__u32 get_uuid2int(const char *name, int len)
-{
- __u32 key0 = 0x12a3fe2d, key1 = 0x37abe8f9;
-
- while (len--) {
- __u32 key = key1 + (key0 ^ (*name++ * 7152373));
-
- if (key & 0x80000000)
- key -= 0x7fffffff;
- key1 = key0;
- key0 = key;
- }
- return (key0 << 1);
-}
-
-void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid)
-{
- __u64 key = 0, key0 = 0x12a3fe2d, key1 = 0x37abe8f9;
-
- while (len--) {
- key = key1 + (key0 ^ (*name++ * 7152373));
- if (key & 0x8000000000000000ULL)
- key -= 0x7fffffffffffffffULL;
- key1 = key0;
- key0 = key;
- }
-
- fsid->val[0] = key;
- fsid->val[1] = key >> 32;
-}
-
-struct inode *search_inode_for_lustre(struct super_block *sb,
- const struct lu_fid *fid)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct ptlrpc_request *req = NULL;
- struct inode *inode = NULL;
- int eadatalen = 0;
- unsigned long hash = cl_fid_build_ino(fid,
- ll_need_32bit_api(sbi));
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_INFO, "searching inode for:(%lu," DFID ")\n", hash, PFID(fid));
-
- inode = ilookup5(sb, hash, ll_test_inode_by_fid, (void *)fid);
- if (inode)
- return inode;
-
- rc = ll_get_default_mdsize(sbi, &eadatalen);
- if (rc)
- return ERR_PTR(rc);
-
- /* Because inode is NULL, ll_prep_md_op_data can not
- * be used here. So we allocate op_data ourselves
- */
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return ERR_PTR(-ENOMEM);
-
- op_data->op_fid1 = *fid;
- op_data->op_mode = eadatalen;
- op_data->op_valid = OBD_MD_FLEASIZE;
-
- /* mds_fid2dentry ignores f_type */
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- kfree(op_data);
- if (rc) {
- CDEBUG(D_INFO, "can't get object attrs, fid " DFID ", rc %d\n",
- PFID(fid), rc);
- return ERR_PTR(rc);
- }
- rc = ll_prep_inode(&inode, req, sb, NULL);
- ptlrpc_req_finished(req);
- if (rc)
- return ERR_PTR(rc);
-
- return inode;
-}
-
-struct lustre_nfs_fid {
- struct lu_fid lnf_child;
- struct lu_fid lnf_parent;
-};
-
-static struct dentry *
-ll_iget_for_nfs(struct super_block *sb,
- struct lu_fid *fid, struct lu_fid *parent)
-{
- struct inode *inode;
- struct dentry *result;
-
- if (!fid_is_sane(fid))
- return ERR_PTR(-ESTALE);
-
- CDEBUG(D_INFO, "Get dentry for fid: " DFID "\n", PFID(fid));
-
- inode = search_inode_for_lustre(sb, fid);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- if (is_bad_inode(inode)) {
- /* we didn't find the right inode.. */
- iput(inode);
- return ERR_PTR(-ESTALE);
- }
-
- result = d_obtain_alias(inode);
- if (IS_ERR(result)) {
- iput(inode);
- return result;
- }
-
- /**
- * In case d_obtain_alias() found a disconnected dentry, always update
- * lli_pfid to allow later operation (normally open) have parent fid,
- * which may be used by MDS to create data.
- */
- if (parent) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- spin_lock(&lli->lli_lock);
- lli->lli_pfid = *parent;
- spin_unlock(&lli->lli_lock);
- }
-
- /* N.B. d_obtain_alias() drops inode ref on error */
- result = d_obtain_alias(inode);
- if (!IS_ERR(result)) {
- /*
- * Need to signal to the ll_intent_file_open that
- * we came from NFS and so opencache needs to be
- * enabled for this one
- */
- ll_d2d(result)->lld_nfs_dentry = 1;
- }
-
- return result;
-}
-
-/**
- * \a connectable - is nfsd will connect himself or this should be done
- * at lustre
- *
- * The return value is file handle type:
- * 1 -- contains child file handle;
- * 2 -- contains child file handle and parent file handle;
- * 255 -- error.
- */
-static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
- struct inode *parent)
-{
- int fileid_len = sizeof(struct lustre_nfs_fid) / 4;
- struct lustre_nfs_fid *nfs_fid = (void *)fh;
-
- CDEBUG(D_INFO, "%s: encoding for (" DFID ") maxlen=%d minlen=%d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), *plen, fileid_len);
-
- if (*plen < fileid_len) {
- *plen = fileid_len;
- return FILEID_INVALID;
- }
-
- nfs_fid->lnf_child = *ll_inode2fid(inode);
- if (parent)
- nfs_fid->lnf_parent = *ll_inode2fid(parent);
- else
- fid_zero(&nfs_fid->lnf_parent);
- *plen = fileid_len;
-
- return FILEID_LUSTRE;
-}
-
-static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name,
- int namelen, loff_t hash, u64 ino,
- unsigned int type)
-{
- /* It is hack to access lde_fid for comparison with lgd_fid.
- * So the input 'name' must be part of the 'lu_dirent'.
- */
- struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name);
- struct ll_getname_data *lgd =
- container_of(ctx, struct ll_getname_data, ctx);
- struct lu_fid fid;
-
- fid_le_to_cpu(&fid, &lde->lde_fid);
- if (lu_fid_eq(&fid, &lgd->lgd_fid)) {
- memcpy(lgd->lgd_name, name, namelen);
- lgd->lgd_name[namelen] = 0;
- lgd->lgd_found = 1;
- }
- return lgd->lgd_found;
-}
-
-static int ll_get_name(struct dentry *dentry, char *name,
- struct dentry *child)
-{
- struct inode *dir = d_inode(dentry);
- int rc;
- struct ll_getname_data lgd = {
- .lgd_name = name,
- .lgd_fid = ll_i2info(d_inode(child))->lli_fid,
- .ctx.actor = ll_nfs_get_name_filldir,
- };
- struct md_op_data *op_data;
- __u64 pos = 0;
-
- if (!dir || !S_ISDIR(dir->i_mode)) {
- rc = -ENOTDIR;
- goto out;
- }
-
- if (!dir->i_fop) {
- rc = -EINVAL;
- goto out;
- }
-
- op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
- LUSTRE_OPC_ANY, dir);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
- inode_lock(dir);
- rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx);
- inode_unlock(dir);
- ll_finish_md_op_data(op_data);
- if (!rc && !lgd.lgd_found)
- rc = -ENOENT;
-out:
- return rc;
-}
-
-static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-
- if (fh_type != FILEID_LUSTRE)
- return ERR_PTR(-EPROTO);
-
- return ll_iget_for_nfs(sb, &nfs_fid->lnf_child, &nfs_fid->lnf_parent);
-}
-
-static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-
- if (fh_type != FILEID_LUSTRE)
- return ERR_PTR(-EPROTO);
-
- return ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL);
-}
-
-int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid)
-{
- struct ptlrpc_request *req = NULL;
- struct ll_sb_info *sbi;
- struct mdt_body *body;
- static const char dotdot[] = "..";
- struct md_op_data *op_data;
- int rc;
- int lmmsize;
-
- LASSERT(dir && S_ISDIR(dir->i_mode));
-
- sbi = ll_s2sbi(dir->i_sb);
-
- CDEBUG(D_INFO, "%s: getting parent for (" DFID ")\n",
- ll_get_fsname(dir->i_sb, NULL, 0),
- PFID(ll_inode2fid(dir)));
-
- rc = ll_get_default_mdsize(sbi, &lmmsize);
- if (rc != 0)
- return rc;
-
- op_data = ll_prep_md_op_data(NULL, dir, NULL, dotdot,
- strlen(dotdot), lmmsize,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc) {
- CERROR("%s: failure inode " DFID " get parent: rc = %d\n",
- ll_get_fsname(dir->i_sb, NULL, 0),
- PFID(ll_inode2fid(dir)), rc);
- return rc;
- }
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- /*
- * LU-3952: MDT may lost the FID of its parent, we should not crash
- * the NFS server, ll_iget_for_nfs() will handle the error.
- */
- if (body->mbo_valid & OBD_MD_FLID) {
- CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
- PFID(ll_inode2fid(dir)), PFID(&body->mbo_fid1));
- *parent_fid = body->mbo_fid1;
- }
-
- ptlrpc_req_finished(req);
- return 0;
-}
-
-static struct dentry *ll_get_parent(struct dentry *dchild)
-{
- struct lu_fid parent_fid = { 0 };
- struct dentry *dentry;
- int rc;
-
- rc = ll_dir_get_parent_fid(dchild->d_inode, &parent_fid);
- if (rc)
- return ERR_PTR(rc);
-
- dentry = ll_iget_for_nfs(dchild->d_inode->i_sb, &parent_fid, NULL);
-
- return dentry;
-}
-
-const struct export_operations lustre_export_operations = {
- .get_parent = ll_get_parent,
- .encode_fh = ll_encode_fh,
- .get_name = ll_get_name,
- .fh_to_dentry = ll_fh_to_dentry,
- .fh_to_parent = ll_fh_to_parent,
-};
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
deleted file mode 100644
index 644bea2f9d37..000000000000
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ /dev/null
@@ -1,1684 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <lprocfs_status.h>
-#include <linux/seq_file.h>
-#include <obd_support.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/* debugfs llite mount point registration */
-static const struct file_operations ll_rw_extents_stats_fops;
-static const struct file_operations ll_rw_extents_stats_pp_fops;
-static const struct file_operations ll_rw_offset_stats_fops;
-
-static ssize_t blocksize_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%u\n", osfs.os_bsize);
-
- return rc;
-}
-LUSTRE_RO_ATTR(blocksize);
-
-static ssize_t kbytestotal_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_blocks;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- rc = sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytestotal);
-
-static ssize_t kbytesfree_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_bfree;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- rc = sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytesfree);
-
-static ssize_t kbytesavail_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_bavail;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- rc = sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytesavail);
-
-static ssize_t filestotal_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%llu\n", osfs.os_files);
-
- return rc;
-}
-LUSTRE_RO_ATTR(filestotal);
-
-static ssize_t filesfree_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%llu\n", osfs.os_ffree);
-
- return rc;
-}
-LUSTRE_RO_ATTR(filesfree);
-
-static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- return sprintf(buf, "local client\n");
-}
-LUSTRE_RO_ATTR(client_type);
-
-static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%s\n", sbi->ll_sb->s_type->name);
-}
-LUSTRE_RO_ATTR(fstype);
-
-static ssize_t uuid_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%s\n", sbi->ll_sb_uuid.uuid);
-}
-LUSTRE_RO_ATTR(uuid);
-
-static int ll_site_stats_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
-
- /*
- * See description of statistical counters in struct cl_site, and
- * struct lu_site.
- */
- return cl_site_stats_print(lu2cl_site(ll_s2sbi(sb)->ll_site), m);
-}
-
-LPROC_SEQ_FOPS_RO(ll_site_stats);
-
-static ssize_t max_read_ahead_mb_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- long pages_number;
- int mult;
-
- spin_lock(&sbi->ll_lock);
- pages_number = sbi->ll_ra_info.ra_max_pages;
- spin_unlock(&sbi->ll_lock);
-
- mult = 1 << (20 - PAGE_SHIFT);
- return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
-}
-
-static ssize_t max_read_ahead_mb_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pages_number;
-
- rc = kstrtoul(buffer, 10, &pages_number);
- if (rc)
- return rc;
-
- pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
-
- if (pages_number > totalram_pages / 2) {
- CERROR("can't set file readahead more than %lu MB\n",
- totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- sbi->ll_ra_info.ra_max_pages = pages_number;
- spin_unlock(&sbi->ll_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_read_ahead_mb);
-
-static ssize_t max_read_ahead_per_file_mb_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- long pages_number;
- int mult;
-
- spin_lock(&sbi->ll_lock);
- pages_number = sbi->ll_ra_info.ra_max_pages_per_file;
- spin_unlock(&sbi->ll_lock);
-
- mult = 1 << (20 - PAGE_SHIFT);
- return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
-}
-
-static ssize_t max_read_ahead_per_file_mb_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pages_number;
-
- rc = kstrtoul(buffer, 10, &pages_number);
- if (rc)
- return rc;
-
- if (pages_number > sbi->ll_ra_info.ra_max_pages) {
- CERROR("can't set file readahead more than max_read_ahead_mb %lu MB\n",
- sbi->ll_ra_info.ra_max_pages);
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- sbi->ll_ra_info.ra_max_pages_per_file = pages_number;
- spin_unlock(&sbi->ll_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_read_ahead_per_file_mb);
-
-static ssize_t max_read_ahead_whole_mb_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- long pages_number;
- int mult;
-
- spin_lock(&sbi->ll_lock);
- pages_number = sbi->ll_ra_info.ra_max_read_ahead_whole_pages;
- spin_unlock(&sbi->ll_lock);
-
- mult = 1 << (20 - PAGE_SHIFT);
- return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
-}
-
-static ssize_t max_read_ahead_whole_mb_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pages_number;
-
- rc = kstrtoul(buffer, 10, &pages_number);
- if (rc)
- return rc;
-
- /* Cap this at the current max readahead window size, the readahead
- * algorithm does this anyway so it's pointless to set it larger.
- */
- if (pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
- CERROR("can't set max_read_ahead_whole_mb more than max_read_ahead_per_file_mb: %lu\n",
- sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_SHIFT));
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- sbi->ll_ra_info.ra_max_read_ahead_whole_pages = pages_number;
- spin_unlock(&sbi->ll_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_read_ahead_whole_mb);
-
-static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = sbi->ll_cache;
- int shift = 20 - PAGE_SHIFT;
- long max_cached_mb;
- long unused_mb;
-
- max_cached_mb = cache->ccc_lru_max >> shift;
- unused_mb = atomic_long_read(&cache->ccc_lru_left) >> shift;
- seq_printf(m,
- "users: %d\n"
- "max_cached_mb: %ld\n"
- "used_mb: %ld\n"
- "unused_mb: %ld\n"
- "reclaim_count: %u\n",
- atomic_read(&cache->ccc_users),
- max_cached_mb,
- max_cached_mb - unused_mb,
- unused_mb,
- cache->ccc_lru_shrinkers);
- return 0;
-}
-
-static ssize_t ll_max_cached_mb_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct super_block *sb = ((struct seq_file *)file->private_data)->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = sbi->ll_cache;
- struct lu_env *env;
- long diff = 0;
- long nrpages = 0;
- u16 refcheck;
- long pages_number;
- int mult;
- long rc;
- u64 val;
- char kernbuf[128];
-
- if (count >= sizeof(kernbuf))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
- kernbuf[count] = 0;
-
- mult = 1 << (20 - PAGE_SHIFT);
- buffer += lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count) -
- kernbuf;
- rc = lprocfs_write_frac_u64_helper(buffer, count, &val, mult);
- if (rc)
- return rc;
-
- if (val > LONG_MAX)
- return -ERANGE;
- pages_number = (long)val;
-
- if (pages_number < 0 || pages_number > totalram_pages) {
- CERROR("%s: can't set max cache more than %lu MB\n",
- ll_get_fsname(sb, NULL, 0),
- totalram_pages >> (20 - PAGE_SHIFT));
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- diff = pages_number - cache->ccc_lru_max;
- spin_unlock(&sbi->ll_lock);
-
- /* easy - add more LRU slots. */
- if (diff >= 0) {
- atomic_long_add(diff, &cache->ccc_lru_left);
- rc = 0;
- goto out;
- }
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return 0;
-
- diff = -diff;
- while (diff > 0) {
- long tmp;
-
- /* reduce LRU budget from free slots. */
- do {
- long ov, nv;
-
- ov = atomic_long_read(&cache->ccc_lru_left);
- if (ov == 0)
- break;
-
- nv = ov > diff ? ov - diff : 0;
- rc = atomic_long_cmpxchg(&cache->ccc_lru_left, ov, nv);
- if (likely(ov == rc)) {
- diff -= ov - nv;
- nrpages += ov - nv;
- break;
- }
- } while (1);
-
- if (diff <= 0)
- break;
-
- if (!sbi->ll_dt_exp) { /* being initialized */
- rc = 0;
- goto out;
- }
-
- /* difficult - have to ask OSCs to drop LRU slots. */
- tmp = diff << 1;
- rc = obd_set_info_async(env, sbi->ll_dt_exp,
- sizeof(KEY_CACHE_LRU_SHRINK),
- KEY_CACHE_LRU_SHRINK,
- sizeof(tmp), &tmp, NULL);
- if (rc < 0)
- break;
- }
- cl_env_put(env, &refcheck);
-
-out:
- if (rc >= 0) {
- spin_lock(&sbi->ll_lock);
- cache->ccc_lru_max = pages_number;
- spin_unlock(&sbi->ll_lock);
- rc = count;
- } else {
- atomic_long_add(nrpages, &cache->ccc_lru_left);
- }
- return rc;
-}
-
-LPROC_SEQ_FOPS(ll_max_cached_mb);
-
-static ssize_t checksum_pages_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", (sbi->ll_flags & LL_SBI_CHECKSUM) ? 1 : 0);
-}
-
-static ssize_t checksum_pages_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- if (!sbi->ll_dt_exp)
- /* Not set up yet */
- return -EAGAIN;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
- if (val)
- sbi->ll_flags |= LL_SBI_CHECKSUM;
- else
- sbi->ll_flags &= ~LL_SBI_CHECKSUM;
-
- rc = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
- KEY_CHECKSUM, sizeof(val), &val, NULL);
- if (rc)
- CWARN("Failed to set OSC checksum flags: %d\n", rc);
-
- return count;
-}
-LUSTRE_RW_ATTR(checksum_pages);
-
-static ssize_t ll_rd_track_id(struct kobject *kobj, char *buf,
- enum stats_track_type type)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- if (sbi->ll_stats_track_type == type)
- return sprintf(buf, "%d\n", sbi->ll_stats_track_id);
- else if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
- return sprintf(buf, "0 (all)\n");
- else
- return sprintf(buf, "untracked\n");
-}
-
-static ssize_t ll_wr_track_id(struct kobject *kobj, const char *buffer,
- size_t count,
- enum stats_track_type type)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pid;
-
- rc = kstrtoul(buffer, 10, &pid);
- if (rc)
- return rc;
- sbi->ll_stats_track_id = pid;
- if (pid == 0)
- sbi->ll_stats_track_type = STATS_TRACK_ALL;
- else
- sbi->ll_stats_track_type = type;
- lprocfs_clear_stats(sbi->ll_stats);
- return count;
-}
-
-static ssize_t stats_track_pid_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- return ll_rd_track_id(kobj, buf, STATS_TRACK_PID);
-}
-
-static ssize_t stats_track_pid_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_PID);
-}
-LUSTRE_RW_ATTR(stats_track_pid);
-
-static ssize_t stats_track_ppid_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- return ll_rd_track_id(kobj, buf, STATS_TRACK_PPID);
-}
-
-static ssize_t stats_track_ppid_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_PPID);
-}
-LUSTRE_RW_ATTR(stats_track_ppid);
-
-static ssize_t stats_track_gid_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- return ll_rd_track_id(kobj, buf, STATS_TRACK_GID);
-}
-
-static ssize_t stats_track_gid_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_GID);
-}
-LUSTRE_RW_ATTR(stats_track_gid);
-
-static ssize_t statahead_max_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_sa_max);
-}
-
-static ssize_t statahead_max_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val <= LL_SA_RPC_MAX)
- sbi->ll_sa_max = val;
- else
- CERROR("Bad statahead_max value %lu. Valid values are in the range [0, %d]\n",
- val, LL_SA_RPC_MAX);
-
- return count;
-}
-LUSTRE_RW_ATTR(statahead_max);
-
-static ssize_t statahead_agl_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_flags & LL_SBI_AGL_ENABLED ? 1 : 0);
-}
-
-static ssize_t statahead_agl_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val)
- sbi->ll_flags |= LL_SBI_AGL_ENABLED;
- else
- sbi->ll_flags &= ~LL_SBI_AGL_ENABLED;
-
- return count;
-}
-LUSTRE_RW_ATTR(statahead_agl);
-
-static int ll_statahead_stats_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-
- seq_printf(m,
- "statahead total: %u\n"
- "statahead wrong: %u\n"
- "agl total: %u\n",
- atomic_read(&sbi->ll_sa_total),
- atomic_read(&sbi->ll_sa_wrong),
- atomic_read(&sbi->ll_agl_total));
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(ll_statahead_stats);
-
-static ssize_t lazystatfs_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_flags & LL_SBI_LAZYSTATFS ? 1 : 0);
-}
-
-static ssize_t lazystatfs_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val)
- sbi->ll_flags |= LL_SBI_LAZYSTATFS;
- else
- sbi->ll_flags &= ~LL_SBI_LAZYSTATFS;
-
- return count;
-}
-LUSTRE_RW_ATTR(lazystatfs);
-
-static ssize_t max_easize_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- unsigned int ealen;
- int rc;
-
- rc = ll_get_max_mdsize(sbi, &ealen);
- if (rc)
- return rc;
-
- return sprintf(buf, "%u\n", ealen);
-}
-LUSTRE_RO_ATTR(max_easize);
-
-/**
- * Get default_easize.
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] kobj kernel object for sysfs tree
- * \param[in] attr attribute of this kernel object
- * \param[in] buf buffer to write data into
- *
- * \retval positive \a count on success
- * \retval negative negated errno on failure
- */
-static ssize_t default_easize_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- unsigned int ealen;
- int rc;
-
- rc = ll_get_default_mdsize(sbi, &ealen);
- if (rc)
- return rc;
-
- return sprintf(buf, "%u\n", ealen);
-}
-
-/**
- * Set default_easize.
- *
- * Range checking on the passed value is handled by
- * ll_set_default_mdsize().
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] kobj kernel object for sysfs tree
- * \param[in] attr attribute of this kernel object
- * \param[in] buffer string passed from user space
- * \param[in] count \a buffer length
- *
- * \retval positive \a count on success
- * \retval negative negated errno on failure
- */
-static ssize_t default_easize_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- unsigned long val;
- int rc;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- rc = ll_set_default_mdsize(sbi, val);
- if (rc)
- return rc;
-
- return count;
-}
-LUSTRE_RW_ATTR(default_easize);
-
-static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
-{
- const char *str[] = LL_SBI_FLAGS;
- struct super_block *sb = m->private;
- int flags = ll_s2sbi(sb)->ll_flags;
- int i = 0;
-
- while (flags != 0) {
- if (ARRAY_SIZE(str) <= i) {
- CERROR("%s: Revise array LL_SBI_FLAGS to match sbi flags please.\n",
- ll_get_fsname(sb, NULL, 0));
- return -EINVAL;
- }
-
- if (flags & 0x1)
- seq_printf(m, "%s ", str[i]);
- flags >>= 1;
- ++i;
- }
- seq_puts(m, "\b\n");
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(ll_sbi_flags);
-
-static ssize_t xattr_cache_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_xattr_cache_enabled);
-}
-
-static ssize_t xattr_cache_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val != 0 && val != 1)
- return -ERANGE;
-
- if (val == 1 && !(sbi->ll_flags & LL_SBI_XATTR_CACHE))
- return -ENOTSUPP;
-
- sbi->ll_xattr_cache_enabled = val;
-
- return count;
-}
-LUSTRE_RW_ATTR(xattr_cache);
-
-static int ll_unstable_stats_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = sbi->ll_cache;
- long pages;
- int mb;
-
- pages = atomic_long_read(&cache->ccc_unstable_nr);
- mb = (pages * PAGE_SIZE) >> 20;
-
- seq_printf(m,
- "unstable_check: %8d\n"
- "unstable_pages: %12ld\n"
- "unstable_mb: %8d\n",
- cache->ccc_unstable_check, pages, mb);
-
- return 0;
-}
-
-static ssize_t ll_unstable_stats_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct super_block *sb = ((struct seq_file *)file->private_data)->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- char kernbuf[128];
- int val, rc;
-
- if (!count)
- return 0;
- if (count >= sizeof(kernbuf))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
- kernbuf[count] = 0;
-
- buffer += lprocfs_find_named_value(kernbuf, "unstable_check:", &count) -
- kernbuf;
- rc = lprocfs_write_helper(buffer, count, &val);
- if (rc < 0)
- return rc;
-
- /* borrow lru lock to set the value */
- spin_lock(&sbi->ll_cache->ccc_lru_lock);
- sbi->ll_cache->ccc_unstable_check = !!val;
- spin_unlock(&sbi->ll_cache->ccc_lru_lock);
-
- return count;
-}
-LPROC_SEQ_FOPS(ll_unstable_stats);
-
-static int ll_root_squash_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
-
- seq_printf(m, "%u:%u\n", squash->rsi_uid, squash->rsi_gid);
- return 0;
-}
-
-static ssize_t ll_root_squash_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct seq_file *m = file->private_data;
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
-
- return lprocfs_wr_root_squash(buffer, count, squash,
- ll_get_fsname(sb, NULL, 0));
-}
-LPROC_SEQ_FOPS(ll_root_squash);
-
-static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
- int len;
-
- down_read(&squash->rsi_sem);
- if (!list_empty(&squash->rsi_nosquash_nids)) {
- len = cfs_print_nidlist(m->buf + m->count, m->size - m->count,
- &squash->rsi_nosquash_nids);
- m->count += len;
- seq_puts(m, "\n");
- } else {
- seq_puts(m, "NONE\n");
- }
- up_read(&squash->rsi_sem);
-
- return 0;
-}
-
-static ssize_t ll_nosquash_nids_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct seq_file *m = file->private_data;
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
- int rc;
-
- rc = lprocfs_wr_nosquash_nids(buffer, count, squash,
- ll_get_fsname(sb, NULL, 0));
- if (rc < 0)
- return rc;
-
- ll_compute_rootsquash_state(sbi);
-
- return rc;
-}
-
-LPROC_SEQ_FOPS(ll_nosquash_nids);
-
-static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
- /* { "mntpt_path", ll_rd_path, 0, 0 }, */
- { "site", &ll_site_stats_fops, NULL, 0 },
- /* { "filegroups", lprocfs_rd_filegroups, 0, 0 }, */
- { "max_cached_mb", &ll_max_cached_mb_fops, NULL },
- { "statahead_stats", &ll_statahead_stats_fops, NULL, 0 },
- { "unstable_stats", &ll_unstable_stats_fops, NULL },
- { "sbi_flags", &ll_sbi_flags_fops, NULL, 0 },
- { .name = "root_squash",
- .fops = &ll_root_squash_fops },
- { .name = "nosquash_nids",
- .fops = &ll_nosquash_nids_fops },
- { NULL }
-};
-
-#define MAX_STRING_SIZE 128
-
-static struct attribute *llite_attrs[] = {
- &lustre_attr_blocksize.attr,
- &lustre_attr_kbytestotal.attr,
- &lustre_attr_kbytesfree.attr,
- &lustre_attr_kbytesavail.attr,
- &lustre_attr_filestotal.attr,
- &lustre_attr_filesfree.attr,
- &lustre_attr_client_type.attr,
- &lustre_attr_fstype.attr,
- &lustre_attr_uuid.attr,
- &lustre_attr_max_read_ahead_mb.attr,
- &lustre_attr_max_read_ahead_per_file_mb.attr,
- &lustre_attr_max_read_ahead_whole_mb.attr,
- &lustre_attr_checksum_pages.attr,
- &lustre_attr_stats_track_pid.attr,
- &lustre_attr_stats_track_ppid.attr,
- &lustre_attr_stats_track_gid.attr,
- &lustre_attr_statahead_max.attr,
- &lustre_attr_statahead_agl.attr,
- &lustre_attr_lazystatfs.attr,
- &lustre_attr_max_easize.attr,
- &lustre_attr_default_easize.attr,
- &lustre_attr_xattr_cache.attr,
- NULL,
-};
-
-static void llite_sb_release(struct kobject *kobj)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- complete(&sbi->ll_kobj_unregister);
-}
-
-static struct kobj_type llite_ktype = {
- .default_attrs = llite_attrs,
- .sysfs_ops = &lustre_sysfs_ops,
- .release = llite_sb_release,
-};
-
-static const struct llite_file_opcode {
- __u32 opcode;
- __u32 type;
- const char *opname;
-} llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
- /* file operation */
- { LPROC_LL_DIRTY_HITS, LPROCFS_TYPE_REGS, "dirty_pages_hits" },
- { LPROC_LL_DIRTY_MISSES, LPROCFS_TYPE_REGS, "dirty_pages_misses" },
- { LPROC_LL_READ_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
- "read_bytes" },
- { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
- "write_bytes" },
- { LPROC_LL_BRW_READ, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
- "brw_read" },
- { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
- "brw_write" },
- { LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" },
- { LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" },
- { LPROC_LL_RELEASE, LPROCFS_TYPE_REGS, "close" },
- { LPROC_LL_MAP, LPROCFS_TYPE_REGS, "mmap" },
- { LPROC_LL_LLSEEK, LPROCFS_TYPE_REGS, "seek" },
- { LPROC_LL_FSYNC, LPROCFS_TYPE_REGS, "fsync" },
- { LPROC_LL_READDIR, LPROCFS_TYPE_REGS, "readdir" },
- /* inode operation */
- { LPROC_LL_SETATTR, LPROCFS_TYPE_REGS, "setattr" },
- { LPROC_LL_TRUNC, LPROCFS_TYPE_REGS, "truncate" },
- { LPROC_LL_FLOCK, LPROCFS_TYPE_REGS, "flock" },
- { LPROC_LL_GETATTR, LPROCFS_TYPE_REGS, "getattr" },
- /* dir inode operation */
- { LPROC_LL_CREATE, LPROCFS_TYPE_REGS, "create" },
- { LPROC_LL_LINK, LPROCFS_TYPE_REGS, "link" },
- { LPROC_LL_UNLINK, LPROCFS_TYPE_REGS, "unlink" },
- { LPROC_LL_SYMLINK, LPROCFS_TYPE_REGS, "symlink" },
- { LPROC_LL_MKDIR, LPROCFS_TYPE_REGS, "mkdir" },
- { LPROC_LL_RMDIR, LPROCFS_TYPE_REGS, "rmdir" },
- { LPROC_LL_MKNOD, LPROCFS_TYPE_REGS, "mknod" },
- { LPROC_LL_RENAME, LPROCFS_TYPE_REGS, "rename" },
- /* special inode operation */
- { LPROC_LL_STAFS, LPROCFS_TYPE_REGS, "statfs" },
- { LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" },
- { LPROC_LL_SETXATTR, LPROCFS_TYPE_REGS, "setxattr" },
- { LPROC_LL_GETXATTR, LPROCFS_TYPE_REGS, "getxattr" },
- { LPROC_LL_GETXATTR_HITS, LPROCFS_TYPE_REGS, "getxattr_hits" },
- { LPROC_LL_LISTXATTR, LPROCFS_TYPE_REGS, "listxattr" },
- { LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_REGS, "removexattr" },
- { LPROC_LL_INODE_PERM, LPROCFS_TYPE_REGS, "inode_permission" },
-};
-
-void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count)
-{
- if (!sbi->ll_stats)
- return;
- if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
- lprocfs_counter_add(sbi->ll_stats, op, count);
- else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
- sbi->ll_stats_track_id == current->pid)
- lprocfs_counter_add(sbi->ll_stats, op, count);
- else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
- sbi->ll_stats_track_id == current->real_parent->pid)
- lprocfs_counter_add(sbi->ll_stats, op, count);
- else if (sbi->ll_stats_track_type == STATS_TRACK_GID &&
- sbi->ll_stats_track_id ==
- from_kgid(&init_user_ns, current_gid()))
- lprocfs_counter_add(sbi->ll_stats, op, count);
-}
-EXPORT_SYMBOL(ll_stats_ops_tally);
-
-static const char *ra_stat_string[] = {
- [RA_STAT_HIT] = "hits",
- [RA_STAT_MISS] = "misses",
- [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive",
- [RA_STAT_MISS_IN_WINDOW] = "miss inside window",
- [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page",
- [RA_STAT_FAILED_MATCH] = "failed lock match",
- [RA_STAT_DISCARDED] = "read but discarded",
- [RA_STAT_ZERO_LEN] = "zero length file",
- [RA_STAT_ZERO_WINDOW] = "zero size window",
- [RA_STAT_EOF] = "read-ahead to EOF",
- [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
- [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
- [RA_STAT_FAILED_REACH_END] = "failed to reach end"
-};
-
-int ldebugfs_register_mountpoint(struct dentry *parent,
- struct super_block *sb, char *osc, char *mdc)
-{
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct dentry *dir;
- char name[MAX_STRING_SIZE + 1], *ptr;
- int err, id, len, rc;
-
- name[MAX_STRING_SIZE] = '\0';
-
- LASSERT(sbi);
- LASSERT(mdc);
- LASSERT(osc);
-
- /* Get fsname */
- len = strlen(lsi->lsi_lmd->lmd_profile);
- ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
- if (ptr && (strcmp(ptr, "-client") == 0))
- len -= 7;
-
- /* Mount info */
- snprintf(name, MAX_STRING_SIZE, "%.*s-%p", len,
- lsi->lsi_lmd->lmd_profile, sb);
-
- dir = ldebugfs_register(name, parent, NULL, NULL);
- if (IS_ERR_OR_NULL(dir)) {
- err = dir ? PTR_ERR(dir) : -ENOMEM;
- sbi->ll_debugfs_entry = NULL;
- return err;
- }
- sbi->ll_debugfs_entry = dir;
-
- rc = ldebugfs_seq_create(sbi->ll_debugfs_entry, "dump_page_cache", 0444,
- &vvp_dump_pgcache_file_ops, sbi);
- if (rc)
- CWARN("Error adding the dump_page_cache file\n");
-
- rc = ldebugfs_seq_create(sbi->ll_debugfs_entry, "extents_stats", 0644,
- &ll_rw_extents_stats_fops, sbi);
- if (rc)
- CWARN("Error adding the extent_stats file\n");
-
- rc = ldebugfs_seq_create(sbi->ll_debugfs_entry,
- "extents_stats_per_process",
- 0644, &ll_rw_extents_stats_pp_fops, sbi);
- if (rc)
- CWARN("Error adding the extents_stats_per_process file\n");
-
- rc = ldebugfs_seq_create(sbi->ll_debugfs_entry, "offset_stats", 0644,
- &ll_rw_offset_stats_fops, sbi);
- if (rc)
- CWARN("Error adding the offset_stats file\n");
-
- /* File operations stats */
- sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES,
- LPROCFS_STATS_FLAG_NONE);
- if (!sbi->ll_stats) {
- err = -ENOMEM;
- goto out;
- }
- /* do counter init */
- for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) {
- __u32 type = llite_opcode_table[id].type;
- void *ptr = NULL;
-
- if (type & LPROCFS_TYPE_REGS)
- ptr = "regs";
- else if (type & LPROCFS_TYPE_BYTES)
- ptr = "bytes";
- else if (type & LPROCFS_TYPE_PAGES)
- ptr = "pages";
- lprocfs_counter_init(sbi->ll_stats,
- llite_opcode_table[id].opcode,
- (type & LPROCFS_CNTR_AVGMINMAX),
- llite_opcode_table[id].opname, ptr);
- }
- err = ldebugfs_register_stats(sbi->ll_debugfs_entry, "stats",
- sbi->ll_stats);
- if (err)
- goto out;
-
- sbi->ll_ra_stats = lprocfs_alloc_stats(ARRAY_SIZE(ra_stat_string),
- LPROCFS_STATS_FLAG_NONE);
- if (!sbi->ll_ra_stats) {
- err = -ENOMEM;
- goto out;
- }
-
- for (id = 0; id < ARRAY_SIZE(ra_stat_string); id++)
- lprocfs_counter_init(sbi->ll_ra_stats, id, 0,
- ra_stat_string[id], "pages");
-
- err = ldebugfs_register_stats(sbi->ll_debugfs_entry, "read_ahead_stats",
- sbi->ll_ra_stats);
- if (err)
- goto out;
-
- err = ldebugfs_add_vars(sbi->ll_debugfs_entry,
- lprocfs_llite_obd_vars, sb);
- if (err)
- goto out;
-
- sbi->ll_kobj.kset = llite_kset;
- init_completion(&sbi->ll_kobj_unregister);
- err = kobject_init_and_add(&sbi->ll_kobj, &llite_ktype, NULL,
- "%s", name);
- if (err)
- goto out;
-
- /* MDC info */
- obd = class_name2obd(mdc);
-
- err = sysfs_create_link(&sbi->ll_kobj, &obd->obd_kobj,
- obd->obd_type->typ_name);
- if (err)
- goto out;
-
- /* OSC */
- obd = class_name2obd(osc);
-
- err = sysfs_create_link(&sbi->ll_kobj, &obd->obd_kobj,
- obd->obd_type->typ_name);
-out:
- if (err) {
- ldebugfs_remove(&sbi->ll_debugfs_entry);
- lprocfs_free_stats(&sbi->ll_ra_stats);
- lprocfs_free_stats(&sbi->ll_stats);
- }
- return err;
-}
-
-void ldebugfs_unregister_mountpoint(struct ll_sb_info *sbi)
-{
- if (sbi->ll_debugfs_entry) {
- ldebugfs_remove(&sbi->ll_debugfs_entry);
- kobject_put(&sbi->ll_kobj);
- wait_for_completion(&sbi->ll_kobj_unregister);
- lprocfs_free_stats(&sbi->ll_ra_stats);
- lprocfs_free_stats(&sbi->ll_stats);
- }
-}
-
-#undef MAX_STRING_SIZE
-
-#define pct(a, b) (b ? a * 100 / b : 0)
-
-static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
- struct seq_file *seq, int which)
-{
- unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
- unsigned long start, end, r, w;
- char *unitp = "KMGTPEZY";
- int i, units = 10;
- struct per_process_info *pp_info = &io_extents->pp_extents[which];
-
- read_cum = 0;
- write_cum = 0;
- start = 0;
-
- for (i = 0; i < LL_HIST_MAX; i++) {
- read_tot += pp_info->pp_r_hist.oh_buckets[i];
- write_tot += pp_info->pp_w_hist.oh_buckets[i];
- }
-
- for (i = 0; i < LL_HIST_MAX; i++) {
- r = pp_info->pp_r_hist.oh_buckets[i];
- w = pp_info->pp_w_hist.oh_buckets[i];
- read_cum += r;
- write_cum += w;
- end = 1 << (i + LL_HIST_START - units);
- seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu | %14lu %4lu %4lu\n",
- start, *unitp, end, *unitp,
- (i == LL_HIST_MAX - 1) ? '+' : ' ',
- r, pct(r, read_tot), pct(read_cum, read_tot),
- w, pct(w, write_tot), pct(write_cum, write_tot));
- start = end;
- if (start == 1024) {
- start = 1;
- units += 10;
- unitp++;
- }
- if (read_cum == read_tot && write_cum == write_tot)
- break;
- }
-}
-
-static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
- int k;
-
- ktime_get_real_ts64(&now);
-
- if (!sbi->ll_rw_stats_on) {
- seq_printf(seq, "disabled\n"
- "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n");
- return 0;
- }
- seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n",
- (s64)now.tv_sec, (unsigned long)now.tv_nsec);
- seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
- seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
- "extents", "calls", "%", "cum%",
- "calls", "%", "cum%");
- spin_lock(&sbi->ll_pp_extent_lock);
- for (k = 0; k < LL_PROCESS_HIST_MAX; k++) {
- if (io_extents->pp_extents[k].pid != 0) {
- seq_printf(seq, "\nPID: %d\n",
- io_extents->pp_extents[k].pid);
- ll_display_extents_info(io_extents, seq, k);
- }
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
- return 0;
-}
-
-static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file,
- const char __user *buf,
- size_t len,
- loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
- int i;
- int value = 1, rc = 0;
-
- if (len == 0)
- return -EINVAL;
-
- rc = lprocfs_write_helper(buf, len, &value);
- if (rc < 0 && len < 16) {
- char kernbuf[16];
-
- if (copy_from_user(kernbuf, buf, len))
- return -EFAULT;
- kernbuf[len] = 0;
-
- if (kernbuf[len - 1] == '\n')
- kernbuf[len - 1] = 0;
-
- if (strcmp(kernbuf, "disabled") == 0 ||
- strcmp(kernbuf, "Disabled") == 0)
- value = 0;
- }
-
- if (value == 0)
- sbi->ll_rw_stats_on = 0;
- else
- sbi->ll_rw_stats_on = 1;
-
- spin_lock(&sbi->ll_pp_extent_lock);
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- io_extents->pp_extents[i].pid = 0;
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
- return len;
-}
-
-LPROC_SEQ_FOPS(ll_rw_extents_stats_pp);
-
-static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
-
- ktime_get_real_ts64(&now);
-
- if (!sbi->ll_rw_stats_on) {
- seq_printf(seq, "disabled\n"
- "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n");
- return 0;
- }
- seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n",
- (u64)now.tv_sec, (unsigned long)now.tv_nsec);
-
- seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
- seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
- "extents", "calls", "%", "cum%",
- "calls", "%", "cum%");
- spin_lock(&sbi->ll_lock);
- ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX);
- spin_unlock(&sbi->ll_lock);
-
- return 0;
-}
-
-static ssize_t ll_rw_extents_stats_seq_write(struct file *file,
- const char __user *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
- int i;
- int value = 1, rc = 0;
-
- if (len == 0)
- return -EINVAL;
-
- rc = lprocfs_write_helper(buf, len, &value);
- if (rc < 0 && len < 16) {
- char kernbuf[16];
-
- if (copy_from_user(kernbuf, buf, len))
- return -EFAULT;
- kernbuf[len] = 0;
-
- if (kernbuf[len - 1] == '\n')
- kernbuf[len - 1] = 0;
-
- if (strcmp(kernbuf, "disabled") == 0 ||
- strcmp(kernbuf, "Disabled") == 0)
- value = 0;
- }
-
- if (value == 0)
- sbi->ll_rw_stats_on = 0;
- else
- sbi->ll_rw_stats_on = 1;
-
- spin_lock(&sbi->ll_pp_extent_lock);
- for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
- io_extents->pp_extents[i].pid = 0;
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
-
- return len;
-}
-
-LPROC_SEQ_FOPS(ll_rw_extents_stats);
-
-void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
- struct ll_file_data *file, loff_t pos,
- size_t count, int rw)
-{
- int i, cur = -1;
- struct ll_rw_process_info *process;
- struct ll_rw_process_info *offset;
- int *off_count = &sbi->ll_rw_offset_entry_count;
- int *process_count = &sbi->ll_offset_process_count;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
-
- if (!sbi->ll_rw_stats_on)
- return;
- process = sbi->ll_rw_process_info;
- offset = sbi->ll_rw_offset_info;
-
- spin_lock(&sbi->ll_pp_extent_lock);
- /* Extent statistics */
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- if (io_extents->pp_extents[i].pid == pid) {
- cur = i;
- break;
- }
- }
-
- if (cur == -1) {
- /* new process */
- sbi->ll_extent_process_count =
- (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX;
- cur = sbi->ll_extent_process_count;
- io_extents->pp_extents[cur].pid = pid;
- lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist);
- lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
- }
-
- for (i = 0; (count >= (1 << LL_HIST_START << i)) &&
- (i < (LL_HIST_MAX - 1)); i++)
- ;
- if (rw == 0) {
- io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
- io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
- } else {
- io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
- io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
-
- spin_lock(&sbi->ll_process_lock);
- /* Offset statistics */
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- if (process[i].rw_pid == pid) {
- if (process[i].rw_last_file != file) {
- process[i].rw_range_start = pos;
- process[i].rw_last_file_pos = pos + count;
- process[i].rw_smallest_extent = count;
- process[i].rw_largest_extent = count;
- process[i].rw_offset = 0;
- process[i].rw_last_file = file;
- spin_unlock(&sbi->ll_process_lock);
- return;
- }
- if (process[i].rw_last_file_pos != pos) {
- *off_count =
- (*off_count + 1) % LL_OFFSET_HIST_MAX;
- offset[*off_count].rw_op = process[i].rw_op;
- offset[*off_count].rw_pid = pid;
- offset[*off_count].rw_range_start =
- process[i].rw_range_start;
- offset[*off_count].rw_range_end =
- process[i].rw_last_file_pos;
- offset[*off_count].rw_smallest_extent =
- process[i].rw_smallest_extent;
- offset[*off_count].rw_largest_extent =
- process[i].rw_largest_extent;
- offset[*off_count].rw_offset =
- process[i].rw_offset;
- process[i].rw_op = rw;
- process[i].rw_range_start = pos;
- process[i].rw_smallest_extent = count;
- process[i].rw_largest_extent = count;
- process[i].rw_offset = pos -
- process[i].rw_last_file_pos;
- }
- if (process[i].rw_smallest_extent > count)
- process[i].rw_smallest_extent = count;
- if (process[i].rw_largest_extent < count)
- process[i].rw_largest_extent = count;
- process[i].rw_last_file_pos = pos + count;
- spin_unlock(&sbi->ll_process_lock);
- return;
- }
- }
- *process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX;
- process[*process_count].rw_pid = pid;
- process[*process_count].rw_op = rw;
- process[*process_count].rw_range_start = pos;
- process[*process_count].rw_last_file_pos = pos + count;
- process[*process_count].rw_smallest_extent = count;
- process[*process_count].rw_largest_extent = count;
- process[*process_count].rw_offset = 0;
- process[*process_count].rw_last_file = file;
- spin_unlock(&sbi->ll_process_lock);
-}
-
-static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_process_info *offset = sbi->ll_rw_offset_info;
- struct ll_rw_process_info *process = sbi->ll_rw_process_info;
- int i;
-
- ktime_get_real_ts64(&now);
-
- if (!sbi->ll_rw_stats_on) {
- seq_printf(seq, "disabled\n"
- "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n");
- return 0;
- }
- spin_lock(&sbi->ll_process_lock);
-
- seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n",
- (s64)now.tv_sec, (unsigned long)now.tv_nsec);
- seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n",
- "R/W", "PID", "RANGE START", "RANGE END",
- "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET");
- /* We stored the discontiguous offsets here; print them first */
- for (i = 0; i < LL_OFFSET_HIST_MAX; i++) {
- if (offset[i].rw_pid != 0)
- seq_printf(seq,
- "%3c %10d %14Lu %14Lu %17lu %17lu %14Lu",
- offset[i].rw_op == READ ? 'R' : 'W',
- offset[i].rw_pid,
- offset[i].rw_range_start,
- offset[i].rw_range_end,
- (unsigned long)offset[i].rw_smallest_extent,
- (unsigned long)offset[i].rw_largest_extent,
- offset[i].rw_offset);
- }
- /* Then print the current offsets for each process */
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- if (process[i].rw_pid != 0)
- seq_printf(seq,
- "%3c %10d %14Lu %14Lu %17lu %17lu %14Lu",
- process[i].rw_op == READ ? 'R' : 'W',
- process[i].rw_pid,
- process[i].rw_range_start,
- process[i].rw_last_file_pos,
- (unsigned long)process[i].rw_smallest_extent,
- (unsigned long)process[i].rw_largest_extent,
- process[i].rw_offset);
- }
- spin_unlock(&sbi->ll_process_lock);
-
- return 0;
-}
-
-static ssize_t ll_rw_offset_stats_seq_write(struct file *file,
- const char __user *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_process_info *process_info = sbi->ll_rw_process_info;
- struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info;
- int value = 1, rc = 0;
-
- if (len == 0)
- return -EINVAL;
-
- rc = lprocfs_write_helper(buf, len, &value);
-
- if (rc < 0 && len < 16) {
- char kernbuf[16];
-
- if (copy_from_user(kernbuf, buf, len))
- return -EFAULT;
- kernbuf[len] = 0;
-
- if (kernbuf[len - 1] == '\n')
- kernbuf[len - 1] = 0;
-
- if (strcmp(kernbuf, "disabled") == 0 ||
- strcmp(kernbuf, "Disabled") == 0)
- value = 0;
- }
-
- if (value == 0)
- sbi->ll_rw_stats_on = 0;
- else
- sbi->ll_rw_stats_on = 1;
-
- spin_lock(&sbi->ll_process_lock);
- sbi->ll_offset_process_count = 0;
- sbi->ll_rw_offset_entry_count = 0;
- memset(process_info, 0, sizeof(struct ll_rw_process_info) *
- LL_PROCESS_HIST_MAX);
- memset(offset_info, 0, sizeof(struct ll_rw_process_info) *
- LL_OFFSET_HIST_MAX);
- spin_unlock(&sbi->ll_process_lock);
-
- return len;
-}
-
-LPROC_SEQ_FOPS(ll_rw_offset_stats);
-
-void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
-{
- lvars->obd_vars = lprocfs_llite_obd_vars;
-}
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
deleted file mode 100644
index 6c9ec462eb41..000000000000
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ /dev/null
@@ -1,1202 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/quotaops.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/security.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <lustre_fid.h>
-#include <lustre_dlm.h>
-#include "llite_internal.h"
-
-static int ll_create_it(struct inode *dir, struct dentry *dentry,
- struct lookup_intent *it);
-
-/* called from iget5_locked->find_inode() under inode_hash_lock spinlock */
-static int ll_test_inode(struct inode *inode, void *opaque)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lustre_md *md = opaque;
-
- if (unlikely(!(md->body->mbo_valid & OBD_MD_FLID))) {
- CERROR("MDS body missing FID\n");
- return 0;
- }
-
- if (!lu_fid_eq(&lli->lli_fid, &md->body->mbo_fid1))
- return 0;
-
- return 1;
-}
-
-static int ll_set_inode(struct inode *inode, void *opaque)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct mdt_body *body = ((struct lustre_md *)opaque)->body;
-
- if (unlikely(!(body->mbo_valid & OBD_MD_FLID))) {
- CERROR("MDS body missing FID\n");
- return -EINVAL;
- }
-
- lli->lli_fid = body->mbo_fid1;
- if (unlikely(!(body->mbo_valid & OBD_MD_FLTYPE))) {
- CERROR("Can not initialize inode " DFID
- " without object type: valid = %#llx\n",
- PFID(&lli->lli_fid), body->mbo_valid);
- return -EINVAL;
- }
-
- inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mbo_mode & S_IFMT);
- if (unlikely(inode->i_mode == 0)) {
- CERROR("Invalid inode " DFID " type\n", PFID(&lli->lli_fid));
- return -EINVAL;
- }
-
- ll_lli_init(lli);
-
- return 0;
-}
-
-/**
- * Get an inode by inode number(@hash), which is already instantiated by
- * the intent lookup).
- */
-struct inode *ll_iget(struct super_block *sb, ino_t hash,
- struct lustre_md *md)
-{
- struct inode *inode;
- int rc = 0;
-
- LASSERT(hash != 0);
- inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- if (inode->i_state & I_NEW) {
- rc = ll_read_inode2(inode, md);
- if (!rc && S_ISREG(inode->i_mode) &&
- !ll_i2info(inode)->lli_clob)
- rc = cl_file_inode_init(inode, md);
-
- if (rc) {
- /*
- * Let's clear directory lsm here, otherwise
- * make_bad_inode() will reset the inode mode
- * to regular, then ll_clear_inode will not
- * be able to clear lsm_md
- */
- if (S_ISDIR(inode->i_mode))
- ll_dir_clear_lsm_md(inode);
- make_bad_inode(inode);
- unlock_new_inode(inode);
- iput(inode);
- inode = ERR_PTR(rc);
- } else {
- unlock_new_inode(inode);
- }
- } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
- rc = ll_update_inode(inode, md);
- CDEBUG(D_VFSTRACE, "got inode: " DFID "(%p): rc = %d\n",
- PFID(&md->body->mbo_fid1), inode, rc);
- if (rc) {
- if (S_ISDIR(inode->i_mode))
- ll_dir_clear_lsm_md(inode);
- iput(inode);
- inode = ERR_PTR(rc);
- }
- }
- return inode;
-}
-
-static void ll_invalidate_negative_children(struct inode *dir)
-{
- struct dentry *dentry, *tmp_subdir;
-
- spin_lock(&dir->i_lock);
- hlist_for_each_entry(dentry, &dir->i_dentry, d_u.d_alias) {
- spin_lock(&dentry->d_lock);
- if (!list_empty(&dentry->d_subdirs)) {
- struct dentry *child;
-
- list_for_each_entry_safe(child, tmp_subdir,
- &dentry->d_subdirs,
- d_child) {
- if (d_really_is_negative(child))
- d_lustre_invalidate(child, 1);
- }
- }
- spin_unlock(&dentry->d_lock);
- }
- spin_unlock(&dir->i_lock);
-}
-
-int ll_test_inode_by_fid(struct inode *inode, void *opaque)
-{
- return lu_fid_eq(&ll_i2info(inode)->lli_fid, opaque);
-}
-
-int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, int flag)
-{
- struct lustre_handle lockh;
- int rc;
-
- switch (flag) {
- case LDLM_CB_BLOCKING:
- ldlm_lock2handle(lock, &lockh);
- rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
- if (rc < 0) {
- CDEBUG(D_INODE, "ldlm_cli_cancel: rc = %d\n", rc);
- return rc;
- }
- break;
- case LDLM_CB_CANCELING: {
- struct inode *inode = ll_inode_from_resource_lock(lock);
- __u64 bits = lock->l_policy_data.l_inodebits.bits;
-
- /* Inode is set to lock->l_resource->lr_lvb_inode
- * for mdc - bug 24555
- */
- LASSERT(!lock->l_ast_data);
-
- if (!inode)
- break;
-
- /* Invalidate all dentries associated with this inode */
- LASSERT(ldlm_is_canceling(lock));
-
- if (!fid_res_name_eq(ll_inode2fid(inode),
- &lock->l_resource->lr_name)) {
- LDLM_ERROR(lock,
- "data mismatch with object " DFID "(%p)",
- PFID(ll_inode2fid(inode)), inode);
- LBUG();
- }
-
- if (bits & MDS_INODELOCK_XATTR) {
- if (S_ISDIR(inode->i_mode))
- ll_i2info(inode)->lli_def_stripe_offset = -1;
- ll_xattr_cache_destroy(inode);
- bits &= ~MDS_INODELOCK_XATTR;
- }
-
- /* For OPEN locks we differentiate between lock modes
- * LCK_CR, LCK_CW, LCK_PR - bug 22891
- */
- if (bits & MDS_INODELOCK_OPEN)
- ll_have_md_lock(inode, &bits, lock->l_req_mode);
-
- if (bits & MDS_INODELOCK_OPEN) {
- fmode_t fmode;
-
- switch (lock->l_req_mode) {
- case LCK_CW:
- fmode = FMODE_WRITE;
- break;
- case LCK_PR:
- fmode = FMODE_EXEC;
- break;
- case LCK_CR:
- fmode = FMODE_READ;
- break;
- default:
- LDLM_ERROR(lock, "bad lock mode for OPEN lock");
- LBUG();
- }
-
- ll_md_real_close(inode, fmode);
- }
-
- if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
- MDS_INODELOCK_LAYOUT | MDS_INODELOCK_PERM))
- ll_have_md_lock(inode, &bits, LCK_MINMODE);
-
- if (bits & MDS_INODELOCK_LAYOUT) {
- struct cl_object_conf conf = {
- .coc_opc = OBJECT_CONF_INVALIDATE,
- .coc_inode = inode,
- };
-
- rc = ll_layout_conf(inode, &conf);
- if (rc < 0)
- CDEBUG(D_INODE, "cannot invalidate layout of "
- DFID ": rc = %d\n",
- PFID(ll_inode2fid(inode)), rc);
- }
-
- if (bits & MDS_INODELOCK_UPDATE) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- spin_lock(&lli->lli_lock);
- LTIME_S(inode->i_mtime) = 0;
- LTIME_S(inode->i_atime) = 0;
- LTIME_S(inode->i_ctime) = 0;
- spin_unlock(&lli->lli_lock);
- }
-
- if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- CDEBUG(D_INODE, "invalidating inode " DFID " lli = %p, pfid = " DFID "\n",
- PFID(ll_inode2fid(inode)), lli,
- PFID(&lli->lli_pfid));
-
- truncate_inode_pages(inode->i_mapping, 0);
-
- if (unlikely(!fid_is_zero(&lli->lli_pfid))) {
- struct inode *master_inode = NULL;
- unsigned long hash;
-
- /*
- * This is slave inode, since all of the child
- * dentry is connected on the master inode, so
- * we have to invalidate the negative children
- * on master inode
- */
- CDEBUG(D_INODE,
- "Invalidate s" DFID " m" DFID "\n",
- PFID(ll_inode2fid(inode)),
- PFID(&lli->lli_pfid));
-
- hash = cl_fid_build_ino(&lli->lli_pfid,
- ll_need_32bit_api(ll_i2sbi(inode)));
- /*
- * Do not lookup the inode with ilookup5,
- * otherwise it will cause dead lock,
- *
- * 1. Client1 send chmod req to the MDT0, then
- * on MDT0, it enqueues master and all of its
- * slaves lock, (mdt_attr_set() ->
- * mdt_lock_slaves()), after gets master and
- * stripe0 lock, it will send the enqueue req
- * (for stripe1) to MDT1, then MDT1 finds the
- * lock has been granted to client2. Then MDT1
- * sends blocking ast to client2.
- *
- * 2. At the same time, client2 tries to unlink
- * the striped dir (rm -rf striped_dir), and
- * during lookup, it will hold the master inode
- * of the striped directory, whose inode state
- * is NEW, then tries to revalidate all of its
- * slaves, (ll_prep_inode()->ll_iget()->
- * ll_read_inode2()-> ll_update_inode().). And
- * it will be blocked on the server side because
- * of 1.
- *
- * 3. Then the client get the blocking_ast req,
- * cancel the lock, but being blocked if using
- * ->ilookup5()), because master inode state is
- * NEW.
- */
- master_inode = ilookup5_nowait(inode->i_sb,
- hash,
- ll_test_inode_by_fid,
- (void *)&lli->lli_pfid);
- if (master_inode) {
- ll_invalidate_negative_children(master_inode);
- iput(master_inode);
- }
- } else {
- ll_invalidate_negative_children(inode);
- }
- }
-
- if ((bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)) &&
- inode->i_sb->s_root &&
- !is_root_inode(inode))
- ll_invalidate_aliases(inode);
-
- iput(inode);
- break;
- }
- default:
- LBUG();
- }
-
- return 0;
-}
-
-__u32 ll_i2suppgid(struct inode *i)
-{
- if (in_group_p(i->i_gid))
- return (__u32)from_kgid(&init_user_ns, i->i_gid);
- else
- return (__u32)(-1);
-}
-
-/* Pack the required supplementary groups into the supplied groups array.
- * If we don't need to use the groups from the target inode(s) then we
- * instead pack one or more groups from the user's supplementary group
- * array in case it might be useful. Not needed if doing an MDS-side upcall.
- */
-void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2)
-{
- LASSERT(i1);
-
- suppgids[0] = ll_i2suppgid(i1);
-
- if (i2)
- suppgids[1] = ll_i2suppgid(i2);
- else
- suppgids[1] = -1;
-}
-
-/*
- * Try to reuse unhashed or invalidated dentries.
- * This is very similar to d_exact_alias(), and any changes in one should be
- * considered for inclusion in the other. The differences are that we don't
- * need an unhashed alias, and we don't want d_compare to be used for
- * comparison.
- */
-static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
-{
- struct dentry *alias;
-
- if (hlist_empty(&inode->i_dentry))
- return NULL;
-
- spin_lock(&inode->i_lock);
- hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
- LASSERT(alias != dentry);
- /*
- * Don't need alias->d_lock here, because aliases with
- * d_parent == entry->d_parent are not subject to name or
- * parent changes, because the parent inode i_mutex is held.
- */
-
- if (alias->d_parent != dentry->d_parent)
- continue;
- if (alias->d_name.hash != dentry->d_name.hash)
- continue;
- if (alias->d_name.len != dentry->d_name.len ||
- memcmp(alias->d_name.name, dentry->d_name.name,
- dentry->d_name.len) != 0)
- continue;
- spin_lock(&alias->d_lock);
- dget_dlock(alias);
- spin_unlock(&alias->d_lock);
- spin_unlock(&inode->i_lock);
- return alias;
- }
- spin_unlock(&inode->i_lock);
-
- return NULL;
-}
-
-/*
- * Similar to d_splice_alias(), but lustre treats invalid alias
- * similar to DCACHE_DISCONNECTED, and tries to use it anyway.
- */
-struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
-{
- if (inode && !S_ISDIR(inode->i_mode)) {
- struct dentry *new = ll_find_alias(inode, de);
-
- if (new) {
- d_move(new, de);
- iput(inode);
- CDEBUG(D_DENTRY,
- "Reuse dentry %p inode %p refc %d flags %#x\n",
- new, d_inode(new), d_count(new), new->d_flags);
- return new;
- }
- d_add(de, inode);
- } else {
- struct dentry *new = d_splice_alias(inode, de);
-
- if (new)
- de = new;
- }
- CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
- de, d_inode(de), d_count(de), de->d_flags);
- return de;
-}
-
-static int ll_lookup_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it,
- struct inode *parent, struct dentry **de)
-{
- struct inode *inode = NULL;
- __u64 bits = 0;
- int rc = 0;
- struct dentry *alias;
-
- /* NB 1 request reference will be taken away by ll_intent_lock()
- * when I return
- */
- CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it,
- it->it_disposition);
- if (!it_disposition(it, DISP_LOOKUP_NEG)) {
- rc = ll_prep_inode(&inode, request, (*de)->d_sb, it);
- if (rc)
- return rc;
-
- ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits);
-
- /* We used to query real size from OSTs here, but actually
- * this is not needed. For stat() calls size would be updated
- * from subsequent do_revalidate()->ll_inode_revalidate_it() in
- * 2.4 and
- * vfs_getattr_it->ll_getattr()->ll_inode_revalidate_it() in 2.6
- * Everybody else who needs correct file size would call
- * ll_glimpse_size or some equivalent themselves anyway.
- * Also see bug 7198.
- */
- }
-
- alias = ll_splice_alias(inode, *de);
- if (IS_ERR(alias)) {
- rc = PTR_ERR(alias);
- goto out;
- }
- *de = alias;
-
- if (!it_disposition(it, DISP_LOOKUP_NEG)) {
- /* We have the "lookup" lock, so unhide dentry */
- if (bits & MDS_INODELOCK_LOOKUP)
- d_lustre_revalidate(*de);
- } else if (!it_disposition(it, DISP_OPEN_CREATE)) {
- /* If file created on server, don't depend on parent UPDATE
- * lock to unhide it. It is left hidden and next lookup can
- * find it in ll_splice_alias.
- */
- /* Check that parent has UPDATE lock. */
- struct lookup_intent parent_it = {
- .it_op = IT_GETATTR,
- .it_lock_handle = 0 };
- struct lu_fid fid = ll_i2info(parent)->lli_fid;
-
- /* If it is striped directory, get the real stripe parent */
- if (unlikely(ll_i2info(parent)->lli_lsm_md)) {
- rc = md_get_fid_from_lsm(ll_i2mdexp(parent),
- ll_i2info(parent)->lli_lsm_md,
- (*de)->d_name.name,
- (*de)->d_name.len, &fid);
- if (rc)
- return rc;
- }
-
- if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it, &fid,
- NULL)) {
- d_lustre_revalidate(*de);
- ll_intent_release(&parent_it);
- }
- }
-
-out:
- if (rc != 0 && it->it_op & IT_OPEN)
- ll_open_cleanup((*de)->d_sb, request);
-
- return rc;
-}
-
-static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
- struct lookup_intent *it)
-{
- struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
- struct dentry *save = dentry, *retval;
- struct ptlrpc_request *req = NULL;
- struct md_op_data *op_data = NULL;
- struct inode *inode;
- __u32 opc;
- int rc;
-
- if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen)
- return ERR_PTR(-ENAMETOOLONG);
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),intent=%s\n",
- dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it));
-
- if (d_mountpoint(dentry))
- CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
-
- if (!it || it->it_op == IT_GETXATTR)
- it = &lookup_it;
-
- if (it->it_op == IT_GETATTR && dentry_may_statahead(parent, dentry)) {
- rc = ll_statahead(parent, &dentry, 0);
- if (rc == 1) {
- if (dentry == save)
- retval = NULL;
- else
- retval = dentry;
- goto out;
- }
- }
-
- if (it->it_op & IT_OPEN && it->it_flags & FMODE_WRITE && sb_rdonly(dentry->d_sb))
- return ERR_PTR(-EROFS);
-
- if (it->it_op & IT_CREAT)
- opc = LUSTRE_OPC_CREATE;
- else
- opc = LUSTRE_OPC_ANY;
-
- op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name,
- dentry->d_name.len, 0, opc, NULL);
- if (IS_ERR(op_data))
- return (void *)op_data;
-
- /* enforce umask if acl disabled or MDS doesn't support umask */
- if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
- it->it_create_mode &= ~current_umask();
-
- rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
- &ll_md_blocking_ast, 0);
- /*
- * If the MDS allows the client to chgrp (CFS_SETGRP_PERM), but the
- * client does not know which suppgid should be sent to the MDS, or
- * some other(s) changed the target file's GID after this RPC sent
- * to the MDS with the suppgid as the original GID, then we should
- * try again with right suppgid.
- */
- if (rc == -EACCES && it->it_op & IT_OPEN &&
- it_disposition(it, DISP_OPEN_DENY)) {
- struct mdt_body *body;
-
- LASSERT(req);
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (op_data->op_suppgids[0] == body->mbo_gid ||
- op_data->op_suppgids[1] == body->mbo_gid ||
- !in_group_p(make_kgid(&init_user_ns, body->mbo_gid))) {
- retval = ERR_PTR(-EACCES);
- goto out;
- }
-
- fid_zero(&op_data->op_fid2);
- op_data->op_suppgids[1] = body->mbo_gid;
- ptlrpc_req_finished(req);
- req = NULL;
- ll_intent_release(it);
- rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
- ll_md_blocking_ast, 0);
- }
-
- if (rc < 0) {
- retval = ERR_PTR(rc);
- goto out;
- }
-
- rc = ll_lookup_it_finish(req, it, parent, &dentry);
- if (rc != 0) {
- ll_intent_release(it);
- retval = ERR_PTR(rc);
- goto out;
- }
-
- inode = d_inode(dentry);
- if ((it->it_op & IT_OPEN) && inode &&
- !S_ISREG(inode->i_mode) &&
- !S_ISDIR(inode->i_mode)) {
- ll_release_openhandle(inode, it);
- }
- ll_lookup_finish_locks(it, inode);
-
- if (dentry == save)
- retval = NULL;
- else
- retval = dentry;
-out:
- if (op_data && !IS_ERR(op_data))
- ll_finish_md_op_data(op_data);
-
- ptlrpc_req_finished(req);
- return retval;
-}
-
-static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
- unsigned int flags)
-{
- struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
- struct dentry *de;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),flags=%u\n",
- dentry, PFID(ll_inode2fid(parent)), parent, flags);
-
- /* Optimize away (CREATE && !OPEN). Let .create handle the race.
- * but only if we have write permissions there, otherwise we need
- * to proceed with lookup. LU-4185
- */
- if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) &&
- (inode_permission(parent, MAY_WRITE | MAY_EXEC) == 0))
- return NULL;
-
- if (flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE))
- itp = NULL;
- else
- itp = &it;
- de = ll_lookup_it(parent, dentry, itp);
-
- if (itp)
- ll_intent_release(itp);
-
- return de;
-}
-
-/*
- * For cached negative dentry and new dentry, handle lookup/create/open
- * together.
- */
-static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned int open_flags,
- umode_t mode, int *opened)
-{
- struct lookup_intent *it;
- struct dentry *de;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),file %p,open_flags %x,mode %x opened %d\n",
- dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
- *opened);
-
- /* Only negative dentries enter here */
- LASSERT(!d_inode(dentry));
-
- if (!d_in_lookup(dentry)) {
- /* A valid negative dentry that just passed revalidation,
- * there's little point to try and open it server-side,
- * even though there's a minuscle chance it might succeed.
- * Either way it's a valid race to just return -ENOENT here.
- */
- if (!(open_flags & O_CREAT))
- return -ENOENT;
-
- /* Otherwise we just unhash it to be rehashed afresh via
- * lookup if necessary
- */
- d_drop(dentry);
- }
-
- it = kzalloc(sizeof(*it), GFP_NOFS);
- if (!it)
- return -ENOMEM;
-
- it->it_op = IT_OPEN;
- if (open_flags & O_CREAT)
- it->it_op |= IT_CREAT;
- it->it_create_mode = (mode & S_IALLUGO) | S_IFREG;
- it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags);
- it->it_flags &= ~MDS_OPEN_FL_INTERNAL;
-
- /* Dentry added to dcache tree in ll_lookup_it */
- de = ll_lookup_it(dir, dentry, it);
- if (IS_ERR(de))
- rc = PTR_ERR(de);
- else if (de)
- dentry = de;
-
- if (!rc) {
- if (it_disposition(it, DISP_OPEN_CREATE)) {
- /* Dentry instantiated in ll_create_it. */
- rc = ll_create_it(dir, dentry, it);
- if (rc) {
- /* We dget in ll_splice_alias. */
- if (de)
- dput(de);
- goto out_release;
- }
-
- *opened |= FILE_CREATED;
- }
- if (d_really_is_positive(dentry) &&
- it_disposition(it, DISP_OPEN_OPEN)) {
- /* Open dentry. */
- if (S_ISFIFO(d_inode(dentry)->i_mode)) {
- /* We cannot call open here as it might
- * deadlock. This case is unreachable in
- * practice because of OBD_CONNECT_NODEVOH.
- */
- rc = finish_no_open(file, de);
- } else {
- file->private_data = it;
- rc = finish_open(file, dentry, NULL, opened);
- /* We dget in ll_splice_alias. finish_open takes
- * care of dget for fd open.
- */
- if (de)
- dput(de);
- }
- } else {
- rc = finish_no_open(file, de);
- }
- }
-
-out_release:
- ll_intent_release(it);
- kfree(it);
-
- return rc;
-}
-
-/* We depend on "mode" being set with the proper file type/umask by now */
-static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
-{
- struct inode *inode = NULL;
- struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- int rc;
-
- LASSERT(it && it->it_disposition);
-
- LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF));
- request = it->it_request;
- it_clear_disposition(it, DISP_ENQ_CREATE_REF);
- rc = ll_prep_inode(&inode, request, dir->i_sb, it);
- if (rc) {
- inode = ERR_PTR(rc);
- goto out;
- }
-
- LASSERT(hlist_empty(&inode->i_dentry));
-
- /* We asked for a lock on the directory, but were granted a
- * lock on the inode. Since we finally have an inode pointer,
- * stuff it in the lock.
- */
- CDEBUG(D_DLMTRACE, "setting l_ast_data to inode " DFID "(%p)\n",
- PFID(ll_inode2fid(dir)), inode);
- ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
- out:
- ptlrpc_req_finished(request);
- return inode;
-}
-
-/*
- * By the time this is called, we already have created the directory cache
- * entry for the new file, but it is so far negative - it has no inode.
- *
- * We defer creating the OBD object(s) until open, to keep the intent and
- * non-intent code paths similar, and also because we do not have the MDS
- * inode number before calling ll_create_node() (which is needed for LOV),
- * so we would need to do yet another RPC to the MDS to store the LOV EA
- * data on the MDS. If needed, we would pass the PACKED lmm as data and
- * lmm_size in datalen (the MDS still has code which will handle that).
- *
- * If the create succeeds, we fill in the inode information
- * with d_instantiate().
- */
-static int ll_create_it(struct inode *dir, struct dentry *dentry,
- struct lookup_intent *it)
-{
- struct inode *inode;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p), intent=%s\n",
- dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it));
-
- rc = it_open_error(DISP_OPEN_CREATE, it);
- if (rc)
- return rc;
-
- inode = ll_create_node(dir, it);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- d_instantiate(dentry, inode);
-
- return ll_init_security(dentry, inode, dir);
-}
-
-void ll_update_times(struct ptlrpc_request *request, struct inode *inode)
-{
- struct mdt_body *body = req_capsule_server_get(&request->rq_pill,
- &RMF_MDT_BODY);
-
- LASSERT(body);
- if (body->mbo_valid & OBD_MD_FLMTIME &&
- body->mbo_mtime > LTIME_S(inode->i_mtime)) {
- CDEBUG(D_INODE, "setting fid " DFID " mtime from %lu to %llu\n",
- PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime),
- body->mbo_mtime);
- LTIME_S(inode->i_mtime) = body->mbo_mtime;
- }
- if (body->mbo_valid & OBD_MD_FLCTIME &&
- body->mbo_ctime > LTIME_S(inode->i_ctime))
- LTIME_S(inode->i_ctime) = body->mbo_ctime;
-}
-
-static int ll_new_node(struct inode *dir, struct dentry *dentry,
- const char *tgt, umode_t mode, int rdev,
- __u32 opc)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- struct inode *inode = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- int tgt_len = 0;
- int err;
-
- if (unlikely(tgt))
- tgt_len = strlen(tgt) + 1;
-again:
- op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dentry->d_name.name,
- dentry->d_name.len,
- 0, opc, NULL);
- if (IS_ERR(op_data)) {
- err = PTR_ERR(op_data);
- goto err_exit;
- }
-
- err = md_create(sbi->ll_md_exp, op_data, tgt, tgt_len, mode,
- from_kuid(&init_user_ns, current_fsuid()),
- from_kgid(&init_user_ns, current_fsgid()),
- cfs_curproc_cap_pack(), rdev, &request);
- ll_finish_md_op_data(op_data);
- if (err < 0 && err != -EREMOTE)
- goto err_exit;
-
- /*
- * If the client doesn't know where to create a subdirectory (or
- * in case of a race that sends the RPC to the wrong MDS), the
- * MDS will return -EREMOTE and the client will fetch the layout
- * of the directory, then create the directory on the right MDT.
- */
- if (unlikely(err == -EREMOTE)) {
- struct ll_inode_info *lli = ll_i2info(dir);
- struct lmv_user_md *lum;
- int lumsize, err2;
-
- ptlrpc_req_finished(request);
- request = NULL;
-
- err2 = ll_dir_getstripe(dir, (void **)&lum, &lumsize, &request,
- OBD_MD_DEFAULT_MEA);
- if (!err2) {
- /* Update stripe_offset and retry */
- lli->lli_def_stripe_offset = lum->lum_stripe_offset;
- } else if (err2 == -ENODATA &&
- lli->lli_def_stripe_offset != -1) {
- /*
- * If there are no default stripe EA on the MDT, but the
- * client has default stripe, then it probably means
- * default stripe EA has just been deleted.
- */
- lli->lli_def_stripe_offset = -1;
- } else {
- goto err_exit;
- }
-
- ptlrpc_req_finished(request);
- request = NULL;
- goto again;
- }
-
- ll_update_times(request, dir);
-
- err = ll_prep_inode(&inode, request, dir->i_sb, NULL);
- if (err)
- goto err_exit;
-
- d_instantiate(dentry, inode);
-
- err = ll_init_security(dentry, inode, dir);
-err_exit:
- if (request)
- ptlrpc_req_finished(request);
-
- return err;
-}
-
-static int ll_mknod(struct inode *dir, struct dentry *dchild,
- umode_t mode, dev_t rdev)
-{
- int err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p) mode %o dev %x\n",
- dchild, PFID(ll_inode2fid(dir)), dir, mode,
- old_encode_dev(rdev));
-
- if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
- mode &= ~current_umask();
-
- switch (mode & S_IFMT) {
- case 0:
- mode |= S_IFREG;
- /* for mode = 0 case */
- /* fall through */
- case S_IFREG:
- case S_IFCHR:
- case S_IFBLK:
- case S_IFIFO:
- case S_IFSOCK:
- err = ll_new_node(dir, dchild, NULL, mode,
- old_encode_dev(rdev),
- LUSTRE_OPC_MKNOD);
- break;
- case S_IFDIR:
- err = -EPERM;
- break;
- default:
- err = -EINVAL;
- }
-
- if (!err)
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKNOD, 1);
-
- return err;
-}
-
-/*
- * Plain create. Intent create is handled in atomic_open.
- */
-static int ll_create_nd(struct inode *dir, struct dentry *dentry,
- umode_t mode, bool want_excl)
-{
- int rc;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op:name=%pd, dir=" DFID "(%p), flags=%u, excl=%d\n",
- dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl);
-
- rc = ll_mknod(dir, dentry, mode, 0);
-
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE, 1);
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, unhashed %d\n",
- dentry, d_unhashed(dentry));
-
- return rc;
-}
-
-static int ll_unlink(struct inode *dir, struct dentry *dchild)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
- dchild, dir->i_ino, dir->i_generation, dir);
-
- op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dchild->d_name.name,
- dchild->d_name.len,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- if (dchild->d_inode)
- op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
-
- op_data->op_fid2 = op_data->op_fid3;
- rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
- ll_finish_md_op_data(op_data);
- if (rc)
- goto out;
-
- ll_update_times(request, dir);
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK, 1);
-
- out:
- ptlrpc_req_finished(request);
- return rc;
-}
-
-static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
-{
- int err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir" DFID "(%p)\n",
- dentry, PFID(ll_inode2fid(dir)), dir);
-
- if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
- mode &= ~current_umask();
- mode = (mode & (0777 | S_ISVTX)) | S_IFDIR;
-
- err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR);
- if (!err)
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR, 1);
-
- return err;
-}
-
-static int ll_rmdir(struct inode *dir, struct dentry *dchild)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p)\n",
- dchild, PFID(ll_inode2fid(dir)), dir);
-
- op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dchild->d_name.name,
- dchild->d_name.len,
- S_IFDIR, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- if (dchild->d_inode)
- op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
-
- op_data->op_fid2 = op_data->op_fid3;
- rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
- ll_finish_md_op_data(op_data);
- if (rc == 0) {
- ll_update_times(request, dir);
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_RMDIR, 1);
- }
-
- ptlrpc_req_finished(request);
- return rc;
-}
-
-static int ll_symlink(struct inode *dir, struct dentry *dentry,
- const char *oldname)
-{
- int err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),target=%.*s\n",
- dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname);
-
- err = ll_new_node(dir, dentry, oldname, S_IFLNK | 0777,
- 0, LUSTRE_OPC_SYMLINK);
-
- if (!err)
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK, 1);
-
- return err;
-}
-
-static int ll_link(struct dentry *old_dentry, struct inode *dir,
- struct dentry *new_dentry)
-{
- struct inode *src = d_inode(old_dentry);
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- int err;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op: inode=" DFID "(%p), dir=" DFID "(%p), target=%pd\n",
- PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir,
- new_dentry);
-
- op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name,
- new_dentry->d_name.len,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- err = md_link(sbi->ll_md_exp, op_data, &request);
- ll_finish_md_op_data(op_data);
- if (err)
- goto out;
-
- ll_update_times(request, dir);
- ll_stats_ops_tally(sbi, LPROC_LL_LINK, 1);
-out:
- ptlrpc_req_finished(request);
- return err;
-}
-
-static int ll_rename(struct inode *src, struct dentry *src_dchild,
- struct inode *tgt, struct dentry *tgt_dchild,
- unsigned int flags)
-{
- struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(src);
- struct md_op_data *op_data;
- int err;
-
- if (flags)
- return -EINVAL;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op:oldname=%pd, src_dir=" DFID "(%p), newname=%pd, tgt_dir=" DFID "(%p)\n",
- src_dchild, PFID(ll_inode2fid(src)), src,
- tgt_dchild, PFID(ll_inode2fid(tgt)), tgt);
-
- op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- if (src_dchild->d_inode)
- op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode);
- if (tgt_dchild->d_inode)
- op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode);
-
- err = md_rename(sbi->ll_md_exp, op_data,
- src_dchild->d_name.name,
- src_dchild->d_name.len,
- tgt_dchild->d_name.name,
- tgt_dchild->d_name.len, &request);
- ll_finish_md_op_data(op_data);
- if (!err) {
- ll_update_times(request, src);
- ll_update_times(request, tgt);
- ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1);
- }
-
- ptlrpc_req_finished(request);
- if (!err)
- d_move(src_dchild, tgt_dchild);
- return err;
-}
-
-const struct inode_operations ll_dir_inode_operations = {
- .mknod = ll_mknod,
- .atomic_open = ll_atomic_open,
- .lookup = ll_lookup_nd,
- .create = ll_create_nd,
- /* We need all these non-raw things for NFSD, to not patch it. */
- .unlink = ll_unlink,
- .mkdir = ll_mkdir,
- .rmdir = ll_rmdir,
- .symlink = ll_symlink,
- .link = ll_link,
- .rename = ll_rename,
- .setattr = ll_setattr,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
- .get_acl = ll_get_acl,
-};
-
-const struct inode_operations ll_special_inode_operations = {
- .setattr = ll_setattr,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
- .get_acl = ll_get_acl,
-};
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.c b/drivers/staging/lustre/lustre/llite/range_lock.c
deleted file mode 100644
index cc9565f6bfe2..000000000000
--- a/drivers/staging/lustre/lustre/llite/range_lock.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Range lock is used to allow multiple threads writing a single shared
- * file given each thread is writing to a non-overlapping portion of the
- * file.
- *
- * Refer to the possible upstream kernel version of range lock by
- * Jan Kara <jack@suse.cz>: https://lkml.org/lkml/2013/1/31/480
- *
- * This file could later replaced by the upstream kernel version.
- */
-/*
- * Author: Prakash Surya <surya1@llnl.gov>
- * Author: Bobi Jam <bobijam.xu@intel.com>
- */
-#include "range_lock.h"
-#include <uapi/linux/lustre/lustre_idl.h>
-
-/**
- * Initialize a range lock tree
- *
- * \param tree [in] an empty range lock tree
- *
- * Pre: Caller should have allocated the range lock tree.
- * Post: The range lock tree is ready to function.
- */
-void range_lock_tree_init(struct range_lock_tree *tree)
-{
- tree->rlt_root = NULL;
- tree->rlt_sequence = 0;
- spin_lock_init(&tree->rlt_lock);
-}
-
-/**
- * Initialize a range lock node
- *
- * \param lock [in] an empty range lock node
- * \param start [in] start of the covering region
- * \param end [in] end of the covering region
- *
- * Pre: Caller should have allocated the range lock node.
- * Post: The range lock node is meant to cover [start, end] region
- */
-int range_lock_init(struct range_lock *lock, __u64 start, __u64 end)
-{
- int rc;
-
- memset(&lock->rl_node, 0, sizeof(lock->rl_node));
- if (end != LUSTRE_EOF)
- end >>= PAGE_SHIFT;
- rc = interval_set(&lock->rl_node, start >> PAGE_SHIFT, end);
- if (rc)
- return rc;
-
- INIT_LIST_HEAD(&lock->rl_next_lock);
- lock->rl_task = NULL;
- lock->rl_lock_count = 0;
- lock->rl_blocking_ranges = 0;
- lock->rl_sequence = 0;
- return rc;
-}
-
-static inline struct range_lock *next_lock(struct range_lock *lock)
-{
- return list_entry(lock->rl_next_lock.next, typeof(*lock), rl_next_lock);
-}
-
-/**
- * Helper function of range_unlock()
- *
- * \param node [in] a range lock found overlapped during interval node
- * search
- * \param arg [in] the range lock to be tested
- *
- * \retval INTERVAL_ITER_CONT indicate to continue the search for next
- * overlapping range node
- * \retval INTERVAL_ITER_STOP indicate to stop the search
- */
-static enum interval_iter range_unlock_cb(struct interval_node *node, void *arg)
-{
- struct range_lock *lock = arg;
- struct range_lock *overlap = node2rangelock(node);
- struct range_lock *iter;
-
- list_for_each_entry(iter, &overlap->rl_next_lock, rl_next_lock) {
- if (iter->rl_sequence > lock->rl_sequence) {
- --iter->rl_blocking_ranges;
- LASSERT(iter->rl_blocking_ranges > 0);
- }
- }
- if (overlap->rl_sequence > lock->rl_sequence) {
- --overlap->rl_blocking_ranges;
- if (overlap->rl_blocking_ranges == 0)
- wake_up_process(overlap->rl_task);
- }
- return INTERVAL_ITER_CONT;
-}
-
-/**
- * Unlock a range lock, wake up locks blocked by this lock.
- *
- * \param tree [in] range lock tree
- * \param lock [in] range lock to be deleted
- *
- * If this lock has been granted, relase it; if not, just delete it from
- * the tree or the same region lock list. Wake up those locks only blocked
- * by this lock through range_unlock_cb().
- */
-void range_unlock(struct range_lock_tree *tree, struct range_lock *lock)
-{
- spin_lock(&tree->rlt_lock);
- if (!list_empty(&lock->rl_next_lock)) {
- struct range_lock *next;
-
- if (interval_is_intree(&lock->rl_node)) { /* first lock */
- /* Insert the next same range lock into the tree */
- next = next_lock(lock);
- next->rl_lock_count = lock->rl_lock_count - 1;
- interval_erase(&lock->rl_node, &tree->rlt_root);
- interval_insert(&next->rl_node, &tree->rlt_root);
- } else {
- /* find the first lock in tree */
- list_for_each_entry(next, &lock->rl_next_lock,
- rl_next_lock) {
- if (!interval_is_intree(&next->rl_node))
- continue;
-
- LASSERT(next->rl_lock_count > 0);
- next->rl_lock_count--;
- break;
- }
- }
- list_del_init(&lock->rl_next_lock);
- } else {
- LASSERT(interval_is_intree(&lock->rl_node));
- interval_erase(&lock->rl_node, &tree->rlt_root);
- }
-
- interval_search(tree->rlt_root, &lock->rl_node.in_extent,
- range_unlock_cb, lock);
- spin_unlock(&tree->rlt_lock);
-}
-
-/**
- * Helper function of range_lock()
- *
- * \param node [in] a range lock found overlapped during interval node
- * search
- * \param arg [in] the range lock to be tested
- *
- * \retval INTERVAL_ITER_CONT indicate to continue the search for next
- * overlapping range node
- * \retval INTERVAL_ITER_STOP indicate to stop the search
- */
-static enum interval_iter range_lock_cb(struct interval_node *node, void *arg)
-{
- struct range_lock *lock = arg;
- struct range_lock *overlap = node2rangelock(node);
-
- lock->rl_blocking_ranges += overlap->rl_lock_count + 1;
- return INTERVAL_ITER_CONT;
-}
-
-/**
- * Lock a region
- *
- * \param tree [in] range lock tree
- * \param lock [in] range lock node containing the region span
- *
- * \retval 0 get the range lock
- * \retval <0 error code while not getting the range lock
- *
- * If there exists overlapping range lock, the new lock will wait and
- * retry, if later it find that it is not the chosen one to wake up,
- * it wait again.
- */
-int range_lock(struct range_lock_tree *tree, struct range_lock *lock)
-{
- struct interval_node *node;
- int rc = 0;
-
- spin_lock(&tree->rlt_lock);
- /*
- * We need to check for all conflicting intervals
- * already in the tree.
- */
- interval_search(tree->rlt_root, &lock->rl_node.in_extent,
- range_lock_cb, lock);
- /*
- * Insert to the tree if I am unique, otherwise I've been linked to
- * the rl_next_lock of another lock which has the same range as mine
- * in range_lock_cb().
- */
- node = interval_insert(&lock->rl_node, &tree->rlt_root);
- if (node) {
- struct range_lock *tmp = node2rangelock(node);
-
- list_add_tail(&lock->rl_next_lock, &tmp->rl_next_lock);
- tmp->rl_lock_count++;
- }
- lock->rl_sequence = ++tree->rlt_sequence;
-
- while (lock->rl_blocking_ranges > 0) {
- lock->rl_task = current;
- __set_current_state(TASK_INTERRUPTIBLE);
- spin_unlock(&tree->rlt_lock);
- schedule();
-
- if (signal_pending(current)) {
- range_unlock(tree, lock);
- rc = -EINTR;
- goto out;
- }
- spin_lock(&tree->rlt_lock);
- }
- spin_unlock(&tree->rlt_lock);
-out:
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.h b/drivers/staging/lustre/lustre/llite/range_lock.h
deleted file mode 100644
index 38b2be4e378f..000000000000
--- a/drivers/staging/lustre/lustre/llite/range_lock.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Range lock is used to allow multiple threads writing a single shared
- * file given each thread is writing to a non-overlapping portion of the
- * file.
- *
- * Refer to the possible upstream kernel version of range lock by
- * Jan Kara <jack@suse.cz>: https://lkml.org/lkml/2013/1/31/480
- *
- * This file could later replaced by the upstream kernel version.
- */
-/*
- * Author: Prakash Surya <surya1@llnl.gov>
- * Author: Bobi Jam <bobijam.xu@intel.com>
- */
-#ifndef _RANGE_LOCK_H
-#define _RANGE_LOCK_H
-
-#include <linux/libcfs/libcfs.h>
-#include <interval_tree.h>
-
-struct range_lock {
- struct interval_node rl_node;
- /**
- * Process to enqueue this lock.
- */
- struct task_struct *rl_task;
- /**
- * List of locks with the same range.
- */
- struct list_head rl_next_lock;
- /**
- * Number of locks in the list rl_next_lock
- */
- unsigned int rl_lock_count;
- /**
- * Number of ranges which are blocking acquisition of the lock
- */
- unsigned int rl_blocking_ranges;
- /**
- * Sequence number of range lock. This number is used to get to know
- * the order the locks are queued; this is required for range_cancel().
- */
- __u64 rl_sequence;
-};
-
-static inline struct range_lock *node2rangelock(const struct interval_node *n)
-{
- return container_of(n, struct range_lock, rl_node);
-}
-
-struct range_lock_tree {
- struct interval_node *rlt_root;
- spinlock_t rlt_lock; /* protect range lock tree */
- __u64 rlt_sequence;
-};
-
-void range_lock_tree_init(struct range_lock_tree *tree);
-int range_lock_init(struct range_lock *lock, __u64 start, __u64 end);
-int range_lock(struct range_lock_tree *tree, struct range_lock *lock);
-void range_unlock(struct range_lock_tree *tree, struct range_lock *lock);
-#endif
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
deleted file mode 100644
index 3e008ce7275d..000000000000
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ /dev/null
@@ -1,1214 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/rw.c
- *
- * Lustre Lite I/O page cache routines shared by different kernel revs
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/writeback.h>
-#include <linux/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-/* current_is_kswapd() */
-#include <linux/swap.h>
-#include <linux/bvec.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_cksum.h>
-#include "llite_internal.h"
-
-static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
-
-/**
- * Get readahead pages from the filesystem readahead pool of the client for a
- * thread.
- *
- * /param sbi superblock for filesystem readahead state ll_ra_info
- * /param ria per-thread readahead state
- * /param pages number of pages requested for readahead for the thread.
- *
- * WARNING: This algorithm is used to reduce contention on sbi->ll_lock.
- * It should work well if the ra_max_pages is much greater than the single
- * file's read-ahead window, and not too many threads contending for
- * these readahead pages.
- *
- * TODO: There may be a 'global sync problem' if many threads are trying
- * to get an ra budget that is larger than the remaining readahead pages
- * and reach here at exactly the same time. They will compute /a ret to
- * consume the remaining pages, but will fail at atomic_add_return() and
- * get a zero ra window, although there is still ra space remaining. - Jay
- */
-static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
- struct ra_io_arg *ria,
- unsigned long pages, unsigned long min)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
- long ret;
-
- /* If read-ahead pages left are less than 1M, do not do read-ahead,
- * otherwise it will form small read RPC(< 1M), which hurt server
- * performance a lot.
- */
- ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), pages);
- if (ret < 0 || ret < min_t(long, PTLRPC_MAX_BRW_PAGES, pages)) {
- ret = 0;
- goto out;
- }
-
- if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) {
- atomic_sub(ret, &ra->ra_cur_pages);
- ret = 0;
- }
-
-out:
- if (ret < min) {
- /* override ra limit for maximum performance */
- atomic_add(min - ret, &ra->ra_cur_pages);
- ret = min;
- }
- return ret;
-}
-
-void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
-
- atomic_sub(len, &ra->ra_cur_pages);
-}
-
-static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
-{
- LASSERTF(which < _NR_RA_STAT, "which: %u\n", which);
- lprocfs_counter_incr(sbi->ll_ra_stats, which);
-}
-
-void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- ll_ra_stats_inc_sbi(sbi, which);
-}
-
-#define RAS_CDEBUG(ras) \
- CDEBUG(D_READA, \
- "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu rpc %lu " \
- "r %lu ri %lu csr %lu sf %lu sp %lu sl %lu\n", \
- ras->ras_last_readpage, ras->ras_consecutive_requests, \
- ras->ras_consecutive_pages, ras->ras_window_start, \
- ras->ras_window_len, ras->ras_next_readahead, \
- ras->ras_rpc_size, \
- ras->ras_requests, ras->ras_request_index, \
- ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
- ras->ras_stride_pages, ras->ras_stride_length)
-
-static int index_in_window(unsigned long index, unsigned long point,
- unsigned long before, unsigned long after)
-{
- unsigned long start = point - before, end = point + after;
-
- if (start > point)
- start = 0;
- if (end < point)
- end = ~0;
-
- return start <= index && index <= end;
-}
-
-void ll_ras_enter(struct file *f)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(f);
- struct ll_readahead_state *ras = &fd->fd_ras;
-
- spin_lock(&ras->ras_lock);
- ras->ras_requests++;
- ras->ras_request_index = 0;
- ras->ras_consecutive_requests++;
- spin_unlock(&ras->ras_lock);
-}
-
-/**
- * Initiates read-ahead of a page with given index.
- *
- * \retval +ve: page was already uptodate so it will be skipped
- * from being added;
- * \retval -ve: page wasn't added to \a queue for error;
- * \retval 0: page was added into \a queue for read ahead.
- */
-static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, pgoff_t index)
-{
- enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */
- struct cl_object *clob = io->ci_obj;
- struct inode *inode = vvp_object_inode(clob);
- const char *msg = NULL;
- struct cl_page *page;
- struct vvp_page *vpg;
- struct page *vmpage;
- int rc = 0;
-
- vmpage = grab_cache_page_nowait(inode->i_mapping, index);
- if (!vmpage) {
- which = RA_STAT_FAILED_GRAB_PAGE;
- msg = "g_c_p_n failed";
- rc = -EBUSY;
- goto out;
- }
-
- /* Check if vmpage was truncated or reclaimed */
- if (vmpage->mapping != inode->i_mapping) {
- which = RA_STAT_WRONG_GRAB_PAGE;
- msg = "g_c_p_n returned invalid page";
- rc = -EBUSY;
- goto out;
- }
-
- page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
- if (IS_ERR(page)) {
- which = RA_STAT_FAILED_GRAB_PAGE;
- msg = "cl_page_find failed";
- rc = PTR_ERR(page);
- goto out;
- }
-
- lu_ref_add(&page->cp_reference, "ra", current);
- cl_page_assume(env, io, page);
- vpg = cl2vvp_page(cl_object_page_slice(clob, page));
- if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
- vpg->vpg_defer_uptodate = 1;
- vpg->vpg_ra_used = 0;
- cl_page_list_add(queue, page);
- } else {
- /* skip completed pages */
- cl_page_unassume(env, io, page);
- /* This page is already uptodate, returning a positive number
- * to tell the callers about this
- */
- rc = 1;
- }
-
- lu_ref_del(&page->cp_reference, "ra", current);
- cl_page_put(env, page);
-out:
- if (vmpage) {
- if (rc)
- unlock_page(vmpage);
- put_page(vmpage);
- }
- if (msg) {
- ll_ra_stats_inc(inode, which);
- CDEBUG(D_READA, "%s\n", msg);
- }
- return rc;
-}
-
-#define RIA_DEBUG(ria) \
- CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \
- ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
- ria->ria_pages)
-
-static inline int stride_io_mode(struct ll_readahead_state *ras)
-{
- return ras->ras_consecutive_stride_requests > 1;
-}
-
-/* The function calculates how much pages will be read in
- * [off, off + length], in such stride IO area,
- * stride_offset = st_off, stride_length = st_len,
- * stride_pages = st_pgs
- *
- * |------------------|*****|------------------|*****|------------|*****|....
- * st_off
- * |--- st_pgs ---|
- * |----- st_len -----|
- *
- * How many pages it should read in such pattern
- * |-------------------------------------------------------------|
- * off
- * |<------ length ------->|
- *
- * = |<----->| + |-------------------------------------| + |---|
- * start_left st_pgs * i end_left
- */
-static unsigned long
-stride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs,
- unsigned long off, unsigned long length)
-{
- __u64 start = off > st_off ? off - st_off : 0;
- __u64 end = off + length > st_off ? off + length - st_off : 0;
- unsigned long start_left = 0;
- unsigned long end_left = 0;
- unsigned long pg_count;
-
- if (st_len == 0 || length == 0 || end == 0)
- return length;
-
- start_left = do_div(start, st_len);
- if (start_left < st_pgs)
- start_left = st_pgs - start_left;
- else
- start_left = 0;
-
- end_left = do_div(end, st_len);
- if (end_left > st_pgs)
- end_left = st_pgs;
-
- CDEBUG(D_READA, "start %llu, end %llu start_left %lu end_left %lu\n",
- start, end, start_left, end_left);
-
- if (start == end)
- pg_count = end_left - (st_pgs - start_left);
- else
- pg_count = start_left + st_pgs * (end - start - 1) + end_left;
-
- CDEBUG(D_READA,
- "st_off %lu, st_len %lu st_pgs %lu off %lu length %lu pgcount %lu\n",
- st_off, st_len, st_pgs, off, length, pg_count);
-
- return pg_count;
-}
-
-static int ria_page_count(struct ra_io_arg *ria)
-{
- __u64 length = ria->ria_end >= ria->ria_start ?
- ria->ria_end - ria->ria_start + 1 : 0;
-
- return stride_pg_count(ria->ria_stoff, ria->ria_length,
- ria->ria_pages, ria->ria_start,
- length);
-}
-
-static unsigned long ras_align(struct ll_readahead_state *ras,
- unsigned long index,
- unsigned long *remainder)
-{
- unsigned long rem = index % ras->ras_rpc_size;
-
- if (remainder)
- *remainder = rem;
- return index - rem;
-}
-
-/*Check whether the index is in the defined ra-window */
-static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
-{
- /* If ria_length == ria_pages, it means non-stride I/O mode,
- * idx should always inside read-ahead window in this case
- * For stride I/O mode, just check whether the idx is inside
- * the ria_pages.
- */
- return ria->ria_length == 0 || ria->ria_length == ria->ria_pages ||
- (idx >= ria->ria_stoff && (idx - ria->ria_stoff) %
- ria->ria_length < ria->ria_pages);
-}
-
-static unsigned long
-ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, struct ll_readahead_state *ras,
- struct ra_io_arg *ria)
-{
- struct cl_read_ahead ra = { 0 };
- unsigned long ra_end = 0;
- bool stride_ria;
- pgoff_t page_idx;
- int rc;
-
- LASSERT(ria);
- RIA_DEBUG(ria);
-
- stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
- for (page_idx = ria->ria_start;
- page_idx <= ria->ria_end && ria->ria_reserved > 0; page_idx++) {
- if (ras_inside_ra_window(page_idx, ria)) {
- if (!ra.cra_end || ra.cra_end < page_idx) {
- unsigned long end;
-
- cl_read_ahead_release(env, &ra);
-
- rc = cl_io_read_ahead(env, io, page_idx, &ra);
- if (rc < 0)
- break;
-
- CDEBUG(D_READA, "idx: %lu, ra: %lu, rpc: %lu\n",
- page_idx, ra.cra_end, ra.cra_rpc_size);
- LASSERTF(ra.cra_end >= page_idx,
- "object: %p, indcies %lu / %lu\n",
- io->ci_obj, ra.cra_end, page_idx);
- /*
- * update read ahead RPC size.
- * NB: it's racy but doesn't matter
- */
- if (ras->ras_rpc_size > ra.cra_rpc_size &&
- ra.cra_rpc_size > 0)
- ras->ras_rpc_size = ra.cra_rpc_size;
- /* trim it to align with optimal RPC size */
- end = ras_align(ras, ria->ria_end + 1, NULL);
- if (end > 0 && !ria->ria_eof)
- ria->ria_end = end - 1;
- if (ria->ria_end < ria->ria_end_min)
- ria->ria_end = ria->ria_end_min;
- if (ria->ria_end > ra.cra_end)
- ria->ria_end = ra.cra_end;
- }
-
- /* If the page is inside the read-ahead window */
- rc = ll_read_ahead_page(env, io, queue, page_idx);
- if (rc < 0)
- break;
-
- ra_end = page_idx;
- if (!rc)
- ria->ria_reserved--;
- } else if (stride_ria) {
- /* If it is not in the read-ahead window, and it is
- * read-ahead mode, then check whether it should skip
- * the stride gap
- */
- pgoff_t offset;
- /* FIXME: This assertion only is valid when it is for
- * forward read-ahead, it will be fixed when backward
- * read-ahead is implemented
- */
- LASSERTF(page_idx >= ria->ria_stoff,
- "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n",
- page_idx,
- ria->ria_start, ria->ria_end, ria->ria_stoff,
- ria->ria_length, ria->ria_pages);
- offset = page_idx - ria->ria_stoff;
- offset = offset % (ria->ria_length);
- if (offset > ria->ria_pages) {
- page_idx += ria->ria_length - offset;
- CDEBUG(D_READA, "i %lu skip %lu\n", page_idx,
- ria->ria_length - offset);
- continue;
- }
- }
- }
- cl_read_ahead_release(env, &ra);
-
- return ra_end;
-}
-
-static int ll_readahead(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue,
- struct ll_readahead_state *ras, bool hit)
-{
- struct vvp_io *vio = vvp_env_io(env);
- struct ll_thread_info *lti = ll_env_info(env);
- struct cl_attr *attr = vvp_env_thread_attr(env);
- unsigned long len, mlen = 0;
- pgoff_t ra_end, start = 0, end = 0;
- struct inode *inode;
- struct ra_io_arg *ria = &lti->lti_ria;
- struct cl_object *clob;
- int ret = 0;
- __u64 kms;
-
- clob = io->ci_obj;
- inode = vvp_object_inode(clob);
-
- memset(ria, 0, sizeof(*ria));
-
- cl_object_attr_lock(clob);
- ret = cl_object_attr_get(env, clob, attr);
- cl_object_attr_unlock(clob);
-
- if (ret != 0)
- return ret;
- kms = attr->cat_kms;
- if (kms == 0) {
- ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN);
- return 0;
- }
-
- spin_lock(&ras->ras_lock);
-
- /**
- * Note: other thread might rollback the ras_next_readahead,
- * if it can not get the full size of prepared pages, see the
- * end of this function. For stride read ahead, it needs to
- * make sure the offset is no less than ras_stride_offset,
- * so that stride read ahead can work correctly.
- */
- if (stride_io_mode(ras))
- start = max(ras->ras_next_readahead, ras->ras_stride_offset);
- else
- start = ras->ras_next_readahead;
-
- if (ras->ras_window_len > 0)
- end = ras->ras_window_start + ras->ras_window_len - 1;
-
- /* Enlarge the RA window to encompass the full read */
- if (vio->vui_ra_valid &&
- end < vio->vui_ra_start + vio->vui_ra_count - 1)
- end = vio->vui_ra_start + vio->vui_ra_count - 1;
-
- if (end) {
- unsigned long end_index;
-
- /* Truncate RA window to end of file */
- end_index = (unsigned long)((kms - 1) >> PAGE_SHIFT);
- if (end_index <= end) {
- end = end_index;
- ria->ria_eof = true;
- }
-
- ras->ras_next_readahead = max(end, end + 1);
- RAS_CDEBUG(ras);
- }
- ria->ria_start = start;
- ria->ria_end = end;
- /* If stride I/O mode is detected, get stride window*/
- if (stride_io_mode(ras)) {
- ria->ria_stoff = ras->ras_stride_offset;
- ria->ria_length = ras->ras_stride_length;
- ria->ria_pages = ras->ras_stride_pages;
- }
- spin_unlock(&ras->ras_lock);
-
- if (end == 0) {
- ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
- return 0;
- }
- len = ria_page_count(ria);
- if (len == 0) {
- ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
- return 0;
- }
-
- CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
- PFID(lu_object_fid(&clob->co_lu)),
- ria->ria_start, ria->ria_end,
- vio->vui_ra_valid ? vio->vui_ra_start : 0,
- vio->vui_ra_valid ? vio->vui_ra_count : 0,
- hit);
-
- /* at least to extend the readahead window to cover current read */
- if (!hit && vio->vui_ra_valid &&
- vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) {
- unsigned long remainder;
-
- /* to the end of current read window. */
- mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start;
- /* trim to RPC boundary */
- ras_align(ras, ria->ria_start, &remainder);
- mlen = min(mlen, ras->ras_rpc_size - remainder);
- ria->ria_end_min = ria->ria_start + mlen;
- }
-
- ria->ria_reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
- if (ria->ria_reserved < len)
- ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
-
- CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
- ria->ria_reserved, len, mlen,
- atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
- ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
-
- ra_end = ll_read_ahead_pages(env, io, queue, ras, ria);
-
- if (ria->ria_reserved)
- ll_ra_count_put(ll_i2sbi(inode), ria->ria_reserved);
-
- if (ra_end == end && ra_end == (kms >> PAGE_SHIFT))
- ll_ra_stats_inc(inode, RA_STAT_EOF);
-
- /* if we didn't get to the end of the region we reserved from
- * the ras we need to go back and update the ras so that the
- * next read-ahead tries from where we left off. we only do so
- * if the region we failed to issue read-ahead on is still ahead
- * of the app and behind the next index to start read-ahead from
- */
- CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n",
- ra_end, end, ria->ria_end, ret);
-
- if (ra_end > 0 && ra_end != end) {
- ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
- spin_lock(&ras->ras_lock);
- if (ra_end <= ras->ras_next_readahead &&
- index_in_window(ra_end, ras->ras_window_start, 0,
- ras->ras_window_len)) {
- ras->ras_next_readahead = ra_end + 1;
- RAS_CDEBUG(ras);
- }
- spin_unlock(&ras->ras_lock);
- }
-
- return ret;
-}
-
-static void ras_set_start(struct inode *inode, struct ll_readahead_state *ras,
- unsigned long index)
-{
- ras->ras_window_start = ras_align(ras, index, NULL);
-}
-
-/* called with the ras_lock held or from places where it doesn't matter */
-static void ras_reset(struct inode *inode, struct ll_readahead_state *ras,
- unsigned long index)
-{
- ras->ras_last_readpage = index;
- ras->ras_consecutive_requests = 0;
- ras->ras_consecutive_pages = 0;
- ras->ras_window_len = 0;
- ras_set_start(inode, ras, index);
- ras->ras_next_readahead = max(ras->ras_window_start, index + 1);
-
- RAS_CDEBUG(ras);
-}
-
-/* called with the ras_lock held or from places where it doesn't matter */
-static void ras_stride_reset(struct ll_readahead_state *ras)
-{
- ras->ras_consecutive_stride_requests = 0;
- ras->ras_stride_length = 0;
- ras->ras_stride_pages = 0;
- RAS_CDEBUG(ras);
-}
-
-void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
-{
- spin_lock_init(&ras->ras_lock);
- ras->ras_rpc_size = PTLRPC_MAX_BRW_PAGES;
- ras_reset(inode, ras, 0);
- ras->ras_requests = 0;
-}
-
-/*
- * Check whether the read request is in the stride window.
- * If it is in the stride window, return 1, otherwise return 0.
- */
-static int index_in_stride_window(struct ll_readahead_state *ras,
- unsigned long index)
-{
- unsigned long stride_gap;
-
- if (ras->ras_stride_length == 0 || ras->ras_stride_pages == 0 ||
- ras->ras_stride_pages == ras->ras_stride_length)
- return 0;
-
- stride_gap = index - ras->ras_last_readpage - 1;
-
- /* If it is contiguous read */
- if (stride_gap == 0)
- return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages;
-
- /* Otherwise check the stride by itself */
- return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap &&
- ras->ras_consecutive_pages == ras->ras_stride_pages;
-}
-
-static void ras_update_stride_detector(struct ll_readahead_state *ras,
- unsigned long index)
-{
- unsigned long stride_gap = index - ras->ras_last_readpage - 1;
-
- if ((stride_gap != 0 || ras->ras_consecutive_stride_requests == 0) &&
- !stride_io_mode(ras)) {
- ras->ras_stride_pages = ras->ras_consecutive_pages;
- ras->ras_stride_length = ras->ras_consecutive_pages +
- stride_gap;
- }
- LASSERT(ras->ras_request_index == 0);
- LASSERT(ras->ras_consecutive_stride_requests == 0);
-
- if (index <= ras->ras_last_readpage) {
- /*Reset stride window for forward read*/
- ras_stride_reset(ras);
- return;
- }
-
- ras->ras_stride_pages = ras->ras_consecutive_pages;
- ras->ras_stride_length = stride_gap + ras->ras_consecutive_pages;
-
- RAS_CDEBUG(ras);
-}
-
-/* Stride Read-ahead window will be increased inc_len according to
- * stride I/O pattern
- */
-static void ras_stride_increase_window(struct ll_readahead_state *ras,
- struct ll_ra_info *ra,
- unsigned long inc_len)
-{
- unsigned long left, step, window_len;
- unsigned long stride_len;
-
- LASSERT(ras->ras_stride_length > 0);
- LASSERTF(ras->ras_window_start + ras->ras_window_len >=
- ras->ras_stride_offset,
- "window_start %lu, window_len %lu stride_offset %lu\n",
- ras->ras_window_start,
- ras->ras_window_len, ras->ras_stride_offset);
-
- stride_len = ras->ras_window_start + ras->ras_window_len -
- ras->ras_stride_offset;
-
- left = stride_len % ras->ras_stride_length;
- window_len = ras->ras_window_len - left;
-
- if (left < ras->ras_stride_pages)
- left += inc_len;
- else
- left = ras->ras_stride_pages + inc_len;
-
- LASSERT(ras->ras_stride_pages != 0);
-
- step = left / ras->ras_stride_pages;
- left %= ras->ras_stride_pages;
-
- window_len += step * ras->ras_stride_length + left;
-
- if (stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length,
- ras->ras_stride_pages, ras->ras_stride_offset,
- window_len) <= ra->ra_max_pages_per_file)
- ras->ras_window_len = window_len;
-
- RAS_CDEBUG(ras);
-}
-
-static void ras_increase_window(struct inode *inode,
- struct ll_readahead_state *ras,
- struct ll_ra_info *ra)
-{
- /* The stretch of ra-window should be aligned with max rpc_size
- * but current clio architecture does not support retrieve such
- * information from lower layer. FIXME later
- */
- if (stride_io_mode(ras)) {
- ras_stride_increase_window(ras, ra, ras->ras_rpc_size);
- } else {
- unsigned long wlen;
-
- wlen = min(ras->ras_window_len + ras->ras_rpc_size,
- ra->ra_max_pages_per_file);
- ras->ras_window_len = ras_align(ras, wlen, NULL);
- }
-}
-
-static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
- struct ll_readahead_state *ras, unsigned long index,
- enum ras_update_flags flags)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
- int zero = 0, stride_detect = 0, ra_miss = 0;
- bool hit = flags & LL_RAS_HIT;
-
- spin_lock(&ras->ras_lock);
-
- if (!hit)
- CDEBUG(D_READA, DFID " pages at %lu miss.\n",
- PFID(ll_inode2fid(inode)), index);
-
- ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS);
-
- /* reset the read-ahead window in two cases. First when the app seeks
- * or reads to some other part of the file. Secondly if we get a
- * read-ahead miss that we think we've previously issued. This can
- * be a symptom of there being so many read-ahead pages that the VM is
- * reclaiming it before we get to it.
- */
- if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
- zero = 1;
- ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE);
- } else if (!hit && ras->ras_window_len &&
- index < ras->ras_next_readahead &&
- index_in_window(index, ras->ras_window_start, 0,
- ras->ras_window_len)) {
- ra_miss = 1;
- ll_ra_stats_inc_sbi(sbi, RA_STAT_MISS_IN_WINDOW);
- }
-
- /* On the second access to a file smaller than the tunable
- * ra_max_read_ahead_whole_pages trigger RA on all pages in the
- * file up to ra_max_pages_per_file. This is simply a best effort
- * and only occurs once per open file. Normal RA behavior is reverted
- * to for subsequent IO. The mmap case does not increment
- * ras_requests and thus can never trigger this behavior.
- */
- if (ras->ras_requests >= 2 && !ras->ras_request_index) {
- __u64 kms_pages;
-
- kms_pages = (i_size_read(inode) + PAGE_SIZE - 1) >>
- PAGE_SHIFT;
-
- CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages,
- ra->ra_max_read_ahead_whole_pages,
- ra->ra_max_pages_per_file);
-
- if (kms_pages &&
- kms_pages <= ra->ra_max_read_ahead_whole_pages) {
- ras->ras_window_start = 0;
- ras->ras_next_readahead = index + 1;
- ras->ras_window_len = min(ra->ra_max_pages_per_file,
- ra->ra_max_read_ahead_whole_pages);
- goto out_unlock;
- }
- }
- if (zero) {
- /* check whether it is in stride I/O mode*/
- if (!index_in_stride_window(ras, index)) {
- if (ras->ras_consecutive_stride_requests == 0 &&
- ras->ras_request_index == 0) {
- ras_update_stride_detector(ras, index);
- ras->ras_consecutive_stride_requests++;
- } else {
- ras_stride_reset(ras);
- }
- ras_reset(inode, ras, index);
- ras->ras_consecutive_pages++;
- goto out_unlock;
- } else {
- ras->ras_consecutive_pages = 0;
- ras->ras_consecutive_requests = 0;
- if (++ras->ras_consecutive_stride_requests > 1)
- stride_detect = 1;
- RAS_CDEBUG(ras);
- }
- } else {
- if (ra_miss) {
- if (index_in_stride_window(ras, index) &&
- stride_io_mode(ras)) {
- if (index != ras->ras_last_readpage + 1)
- ras->ras_consecutive_pages = 0;
- ras_reset(inode, ras, index);
-
- /* If stride-RA hit cache miss, the stride
- * detector will not be reset to avoid the
- * overhead of redetecting read-ahead mode,
- * but on the condition that the stride window
- * is still intersect with normal sequential
- * read-ahead window.
- */
- if (ras->ras_window_start <
- ras->ras_stride_offset)
- ras_stride_reset(ras);
- RAS_CDEBUG(ras);
- } else {
- /* Reset both stride window and normal RA
- * window
- */
- ras_reset(inode, ras, index);
- ras->ras_consecutive_pages++;
- ras_stride_reset(ras);
- goto out_unlock;
- }
- } else if (stride_io_mode(ras)) {
- /* If this is contiguous read but in stride I/O mode
- * currently, check whether stride step still is valid,
- * if invalid, it will reset the stride ra window
- */
- if (!index_in_stride_window(ras, index)) {
- /* Shrink stride read-ahead window to be zero */
- ras_stride_reset(ras);
- ras->ras_window_len = 0;
- ras->ras_next_readahead = index;
- }
- }
- }
- ras->ras_consecutive_pages++;
- ras->ras_last_readpage = index;
- ras_set_start(inode, ras, index);
-
- if (stride_io_mode(ras)) {
- /* Since stride readahead is sensitive to the offset
- * of read-ahead, so we use original offset here,
- * instead of ras_window_start, which is RPC aligned
- */
- ras->ras_next_readahead = max(index, ras->ras_next_readahead);
- ras->ras_window_start = max(ras->ras_stride_offset,
- ras->ras_window_start);
- } else {
- if (ras->ras_next_readahead < ras->ras_window_start)
- ras->ras_next_readahead = ras->ras_window_start;
- if (!hit)
- ras->ras_next_readahead = index + 1;
- }
- RAS_CDEBUG(ras);
-
- /* Trigger RA in the mmap case where ras_consecutive_requests
- * is not incremented and thus can't be used to trigger RA
- */
- if (ras->ras_consecutive_pages >= 4 && flags & LL_RAS_MMAP) {
- ras_increase_window(inode, ras, ra);
- /*
- * reset consecutive pages so that the readahead window can
- * grow gradually.
- */
- ras->ras_consecutive_pages = 0;
- goto out_unlock;
- }
-
- /* Initially reset the stride window offset to next_readahead*/
- if (ras->ras_consecutive_stride_requests == 2 && stride_detect) {
- /**
- * Once stride IO mode is detected, next_readahead should be
- * reset to make sure next_readahead > stride offset
- */
- ras->ras_next_readahead = max(index, ras->ras_next_readahead);
- ras->ras_stride_offset = index;
- ras->ras_window_start = max(index, ras->ras_window_start);
- }
-
- /* The initial ras_window_len is set to the request size. To avoid
- * uselessly reading and discarding pages for random IO the window is
- * only increased once per consecutive request received.
- */
- if ((ras->ras_consecutive_requests > 1 || stride_detect) &&
- !ras->ras_request_index)
- ras_increase_window(inode, ras, ra);
-out_unlock:
- RAS_CDEBUG(ras);
- ras->ras_request_index++;
- spin_unlock(&ras->ras_lock);
-}
-
-int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
-{
- struct inode *inode = vmpage->mapping->host;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lu_env *env;
- struct cl_io *io;
- struct cl_page *page;
- struct cl_object *clob;
- bool redirtied = false;
- bool unlocked = false;
- int result;
- u16 refcheck;
-
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageWriteback(vmpage));
-
- LASSERT(ll_i2dtexp(inode));
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- result = PTR_ERR(env);
- goto out;
- }
-
- clob = ll_i2info(inode)->lli_clob;
- LASSERT(clob);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = clob;
- io->ci_ignore_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, clob);
- if (result == 0) {
- page = cl_page_find(env, clob, vmpage->index,
- vmpage, CPT_CACHEABLE);
- if (!IS_ERR(page)) {
- lu_ref_add(&page->cp_reference, "writepage",
- current);
- cl_page_assume(env, io, page);
- result = cl_page_flush(env, io, page);
- if (result != 0) {
- /*
- * Re-dirty page on error so it retries write,
- * but not in case when IO has actually
- * occurred and completed with an error.
- */
- if (!PageError(vmpage)) {
- redirty_page_for_writepage(wbc, vmpage);
- result = 0;
- redirtied = true;
- }
- }
- cl_page_disown(env, io, page);
- unlocked = true;
- lu_ref_del(&page->cp_reference,
- "writepage", current);
- cl_page_put(env, page);
- } else {
- result = PTR_ERR(page);
- }
- }
- cl_io_fini(env, io);
-
- if (redirtied && wbc->sync_mode == WB_SYNC_ALL) {
- loff_t offset = cl_offset(clob, vmpage->index);
-
- /* Flush page failed because the extent is being written out.
- * Wait for the write of extent to be finished to avoid
- * breaking kernel which assumes ->writepage should mark
- * PageWriteback or clean the page.
- */
- result = cl_sync_file_range(inode, offset,
- offset + PAGE_SIZE - 1,
- CL_FSYNC_LOCAL, 1);
- if (result > 0) {
- /* actually we may have written more than one page.
- * decreasing this page because the caller will count
- * it.
- */
- wbc->nr_to_write -= result - 1;
- result = 0;
- }
- }
-
- cl_env_put(env, &refcheck);
- goto out;
-
-out:
- if (result < 0) {
- if (!lli->lli_async_rc)
- lli->lli_async_rc = result;
- SetPageError(vmpage);
- if (!unlocked)
- unlock_page(vmpage);
- }
- return result;
-}
-
-int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
-{
- struct inode *inode = mapping->host;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- loff_t start;
- loff_t end;
- enum cl_fsync_mode mode;
- int range_whole = 0;
- int result;
- int ignore_layout = 0;
-
- if (wbc->range_cyclic) {
- start = mapping->writeback_index << PAGE_SHIFT;
- end = OBD_OBJECT_EOF;
- } else {
- start = wbc->range_start;
- end = wbc->range_end;
- if (end == LLONG_MAX) {
- end = OBD_OBJECT_EOF;
- range_whole = start == 0;
- }
- }
-
- mode = CL_FSYNC_NONE;
- if (wbc->sync_mode == WB_SYNC_ALL)
- mode = CL_FSYNC_LOCAL;
-
- if (sbi->ll_umounting)
- /* if the mountpoint is being umounted, all pages have to be
- * evicted to avoid hitting LBUG when truncate_inode_pages()
- * is called later on.
- */
- ignore_layout = 1;
-
- if (!ll_i2info(inode)->lli_clob)
- return 0;
-
- result = cl_sync_file_range(inode, start, end, mode, ignore_layout);
- if (result > 0) {
- wbc->nr_to_write -= result;
- result = 0;
- }
-
- if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
- if (end == OBD_OBJECT_EOF)
- mapping->writeback_index = 0;
- else
- mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
- }
- return result;
-}
-
-struct ll_cl_context *ll_cl_find(struct file *file)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_cl_context *lcc;
- struct ll_cl_context *found = NULL;
-
- read_lock(&fd->fd_lock);
- list_for_each_entry(lcc, &fd->fd_lccs, lcc_list) {
- if (lcc->lcc_cookie == current) {
- found = lcc;
- break;
- }
- }
- read_unlock(&fd->fd_lock);
-
- return found;
-}
-
-void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
-
- memset(lcc, 0, sizeof(*lcc));
- INIT_LIST_HEAD(&lcc->lcc_list);
- lcc->lcc_cookie = current;
- lcc->lcc_env = env;
- lcc->lcc_io = io;
-
- write_lock(&fd->fd_lock);
- list_add(&lcc->lcc_list, &fd->fd_lccs);
- write_unlock(&fd->fd_lock);
-}
-
-void ll_cl_remove(struct file *file, const struct lu_env *env)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
-
- write_lock(&fd->fd_lock);
- list_del_init(&lcc->lcc_list);
- write_unlock(&fd->fd_lock);
-}
-
-static int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
-{
- struct inode *inode = vvp_object_inode(page->cp_obj);
- struct ll_file_data *fd = vvp_env_io(env)->vui_fd;
- struct ll_readahead_state *ras = &fd->fd_ras;
- struct cl_2queue *queue = &io->ci_queue;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct vvp_page *vpg;
- bool uptodate;
- int rc = 0;
-
- vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
- uptodate = vpg->vpg_defer_uptodate;
-
- if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
- sbi->ll_ra_info.ra_max_pages > 0) {
- struct vvp_io *vio = vvp_env_io(env);
- enum ras_update_flags flags = 0;
-
- if (uptodate)
- flags |= LL_RAS_HIT;
- if (!vio->vui_ra_valid)
- flags |= LL_RAS_MMAP;
- ras_update(sbi, inode, ras, vvp_index(vpg), flags);
- }
-
- cl_2queue_init(queue);
- if (uptodate) {
- vpg->vpg_ra_used = 1;
- cl_page_export(env, page, 1);
- cl_page_disown(env, io, page);
- } else {
- cl_page_list_add(&queue->c2_qin, page);
- }
-
- if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
- sbi->ll_ra_info.ra_max_pages > 0) {
- int rc2;
-
- rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
- uptodate);
- CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n",
- PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg));
- }
-
- if (queue->c2_qin.pl_nr > 0)
- rc = cl_io_submit_rw(env, io, CRT_READ, queue);
-
- /*
- * Unlock unsent pages in case of error.
- */
- cl_page_list_disown(env, io, &queue->c2_qin);
- cl_2queue_fini(env, queue);
-
- return rc;
-}
-
-int ll_readpage(struct file *file, struct page *vmpage)
-{
- struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob;
- struct ll_cl_context *lcc;
- const struct lu_env *env;
- struct cl_io *io;
- struct cl_page *page;
- int result;
-
- lcc = ll_cl_find(file);
- if (!lcc) {
- unlock_page(vmpage);
- return -EIO;
- }
-
- env = lcc->lcc_env;
- io = lcc->lcc_io;
- LASSERT(io->ci_state == CIS_IO_GOING);
- page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
- if (!IS_ERR(page)) {
- LASSERT(page->cp_type == CPT_CACHEABLE);
- if (likely(!PageUptodate(vmpage))) {
- cl_page_assume(env, io, page);
- result = ll_io_read_page(env, io, page);
- } else {
- /* Page from a non-object file. */
- unlock_page(vmpage);
- result = 0;
- }
- cl_page_put(env, page);
- } else {
- unlock_page(vmpage);
- result = PTR_ERR(page);
- }
- return result;
-}
-
-int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, enum cl_req_type crt)
-{
- struct cl_2queue *queue;
- int result;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- queue = &io->ci_queue;
- cl_2queue_init_page(queue, page);
-
- result = cl_io_submit_sync(env, io, crt, queue, 0);
- LASSERT(cl_page_is_owned(page, io));
-
- if (crt == CRT_READ)
- /*
- * in CRT_WRITE case page is left locked even in case of
- * error.
- */
- cl_page_list_disown(env, io, &queue->c2_qin);
- cl_2queue_fini(env, queue);
-
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
deleted file mode 100644
index 722e5ea1af5f..000000000000
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ /dev/null
@@ -1,641 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/lustre/llite/rw26.c
- *
- * Lustre Lite I/O page cache routines for the 2.5/2.6 kernel version
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uaccess.h>
-
-#include <linux/migrate.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/mpage.h>
-#include <linux/writeback.h>
-#include <linux/pagemap.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-/**
- * Implements Linux VM address_space::invalidatepage() method. This method is
- * called when the page is truncate from a file, either as a result of
- * explicit truncate, or when inode is removed from memory (as a result of
- * final iput(), umount, or memory pressure induced icache shrinking).
- *
- * [0, offset] bytes of the page remain valid (this is for a case of not-page
- * aligned truncate). Lustre leaves partially truncated page in the cache,
- * relying on struct inode::i_size to limit further accesses.
- */
-static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
- unsigned int length)
-{
- struct inode *inode;
- struct lu_env *env;
- struct cl_page *page;
- struct cl_object *obj;
-
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageWriteback(vmpage));
-
- /*
- * It is safe to not check anything in invalidatepage/releasepage
- * below because they are run with page locked and all our io is
- * happening with locked page too
- */
- if (offset == 0 && length == PAGE_SIZE) {
- /* See the comment in ll_releasepage() */
- env = cl_env_percpu_get();
- LASSERT(!IS_ERR(env));
- inode = vmpage->mapping->host;
- obj = ll_i2info(inode)->lli_clob;
- if (obj) {
- page = cl_vmpage_page(vmpage, obj);
- if (page) {
- cl_page_delete(env, page);
- cl_page_put(env, page);
- }
- } else {
- LASSERT(vmpage->private == 0);
- }
- cl_env_percpu_put(env);
- }
-}
-
-static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
-{
- struct lu_env *env;
- struct cl_object *obj;
- struct cl_page *page;
- struct address_space *mapping;
- int result = 0;
-
- LASSERT(PageLocked(vmpage));
- if (PageWriteback(vmpage) || PageDirty(vmpage))
- return 0;
-
- mapping = vmpage->mapping;
- if (!mapping)
- return 1;
-
- obj = ll_i2info(mapping->host)->lli_clob;
- if (!obj)
- return 1;
-
- /* 1 for caller, 1 for cl_page and 1 for page cache */
- if (page_count(vmpage) > 3)
- return 0;
-
- page = cl_vmpage_page(vmpage, obj);
- if (!page)
- return 1;
-
- env = cl_env_percpu_get();
- LASSERT(!IS_ERR(env));
-
- if (!cl_page_in_use(page)) {
- result = 1;
- cl_page_delete(env, page);
- }
-
- /* To use percpu env array, the call path can not be rescheduled;
- * otherwise percpu array will be messed if ll_releaspage() called
- * again on the same CPU.
- *
- * If this page holds the last refc of cl_object, the following
- * call path may cause reschedule:
- * cl_page_put -> cl_page_free -> cl_object_put ->
- * lu_object_put -> lu_object_free -> lov_delete_raid0.
- *
- * However, the kernel can't get rid of this inode until all pages have
- * been cleaned up. Now that we hold page lock here, it's pretty safe
- * that we won't get into object delete path.
- */
- LASSERT(cl_object_refc(obj) > 1);
- cl_page_put(env, page);
-
- cl_env_percpu_put(env);
- return result;
-}
-
-#define MAX_DIRECTIO_SIZE (2 * 1024 * 1024 * 1024UL)
-
-/* ll_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- */
-static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
-{
- int i;
-
- for (i = 0; i < npages; i++) {
- if (do_dirty)
- set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
- }
- kvfree(pages);
-}
-
-ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
- int rw, struct inode *inode,
- struct ll_dio_pages *pv)
-{
- struct cl_page *clp;
- struct cl_2queue *queue;
- struct cl_object *obj = io->ci_obj;
- int i;
- ssize_t rc = 0;
- loff_t file_offset = pv->ldp_start_offset;
- size_t size = pv->ldp_size;
- int page_count = pv->ldp_nr;
- struct page **pages = pv->ldp_pages;
- size_t page_size = cl_page_size(obj);
- bool do_io;
- int io_pages = 0;
-
- queue = &io->ci_queue;
- cl_2queue_init(queue);
- for (i = 0; i < page_count; i++) {
- if (pv->ldp_offsets)
- file_offset = pv->ldp_offsets[i];
-
- LASSERT(!(file_offset & (page_size - 1)));
- clp = cl_page_find(env, obj, cl_index(obj, file_offset),
- pv->ldp_pages[i], CPT_TRANSIENT);
- if (IS_ERR(clp)) {
- rc = PTR_ERR(clp);
- break;
- }
-
- rc = cl_page_own(env, io, clp);
- if (rc) {
- LASSERT(clp->cp_state == CPS_FREEING);
- cl_page_put(env, clp);
- break;
- }
-
- do_io = true;
-
- /* check the page type: if the page is a host page, then do
- * write directly
- */
- if (clp->cp_type == CPT_CACHEABLE) {
- struct page *vmpage = cl_page_vmpage(clp);
- struct page *src_page;
- struct page *dst_page;
- void *src;
- void *dst;
-
- src_page = (rw == WRITE) ? pages[i] : vmpage;
- dst_page = (rw == WRITE) ? vmpage : pages[i];
-
- src = kmap_atomic(src_page);
- dst = kmap_atomic(dst_page);
- memcpy(dst, src, min(page_size, size));
- kunmap_atomic(dst);
- kunmap_atomic(src);
-
- /* make sure page will be added to the transfer by
- * cl_io_submit()->...->vvp_page_prep_write().
- */
- if (rw == WRITE)
- set_page_dirty(vmpage);
-
- if (rw == READ) {
- /* do not issue the page for read, since it
- * may reread a ra page which has NOT uptodate
- * bit set.
- */
- cl_page_disown(env, io, clp);
- do_io = false;
- }
- }
-
- if (likely(do_io)) {
- /*
- * Add a page to the incoming page list of 2-queue.
- */
- cl_page_list_add(&queue->c2_qin, clp);
-
- /*
- * Set page clip to tell transfer formation engine
- * that page has to be sent even if it is beyond KMS.
- */
- cl_page_clip(env, clp, 0, min(size, page_size));
-
- ++io_pages;
- }
-
- /* drop the reference count for cl_page_find */
- cl_page_put(env, clp);
- size -= page_size;
- file_offset += page_size;
- }
-
- if (rc == 0 && io_pages) {
- rc = cl_io_submit_sync(env, io,
- rw == READ ? CRT_READ : CRT_WRITE,
- queue, 0);
- }
- if (rc == 0)
- rc = pv->ldp_size;
-
- cl_2queue_discard(env, io, queue);
- cl_2queue_disown(env, io, queue);
- cl_2queue_fini(env, queue);
- return rc;
-}
-EXPORT_SYMBOL(ll_direct_rw_pages);
-
-static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
- int rw, struct inode *inode,
- struct address_space *mapping,
- size_t size, loff_t file_offset,
- struct page **pages, int page_count)
-{
- struct ll_dio_pages pvec = {
- .ldp_pages = pages,
- .ldp_nr = page_count,
- .ldp_size = size,
- .ldp_offsets = NULL,
- .ldp_start_offset = file_offset
- };
-
- return ll_direct_rw_pages(env, io, rw, inode, &pvec);
-}
-
-/* This is the maximum size of a single O_DIRECT request, based on the
- * kmalloc limit. We need to fit all of the brw_page structs, each one
- * representing PAGE_SIZE worth of user data, into a single buffer, and
- * then truncate this to be a full-sized RPC. For 4kB PAGE_SIZE this is
- * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc.
- */
-#define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \
- PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
-static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
-{
- struct ll_cl_context *lcc;
- const struct lu_env *env;
- struct cl_io *io;
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- loff_t file_offset = iocb->ki_pos;
- ssize_t count = iov_iter_count(iter);
- ssize_t tot_bytes = 0, result = 0;
- long size = MAX_DIO_SIZE;
-
- /* Check EOF by ourselves */
- if (iov_iter_rw(iter) == READ && file_offset >= i_size_read(inode))
- return 0;
-
- /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
- if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
- return -EINVAL;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
- PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
- file_offset, file_offset, count >> PAGE_SHIFT,
- MAX_DIO_SIZE >> PAGE_SHIFT);
-
- /* Check that all user buffers are aligned as well */
- if (iov_iter_alignment(iter) & ~PAGE_MASK)
- return -EINVAL;
-
- lcc = ll_cl_find(file);
- if (!lcc)
- return -EIO;
-
- env = lcc->lcc_env;
- LASSERT(!IS_ERR(env));
- io = lcc->lcc_io;
- LASSERT(io);
-
- while (iov_iter_count(iter)) {
- struct page **pages;
- size_t offs;
-
- count = min_t(size_t, iov_iter_count(iter), size);
- if (iov_iter_rw(iter) == READ) {
- if (file_offset >= i_size_read(inode))
- break;
- if (file_offset + count > i_size_read(inode))
- count = i_size_read(inode) - file_offset;
- }
-
- result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
- if (likely(result > 0)) {
- int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);
-
- result = ll_direct_IO_26_seg(env, io, iov_iter_rw(iter),
- inode, file->f_mapping,
- result, file_offset, pages,
- n);
- ll_free_user_pages(pages, n, iov_iter_rw(iter) == READ);
- }
- if (unlikely(result <= 0)) {
- /* If we can't allocate a large enough buffer
- * for the request, shrink it to a smaller
- * PAGE_SIZE multiple and try again.
- * We should always be able to kmalloc for a
- * page worth of page pointers = 4MB on i386.
- */
- if (result == -ENOMEM &&
- size > (PAGE_SIZE / sizeof(*pages)) *
- PAGE_SIZE) {
- size = ((((size / 2) - 1) |
- ~PAGE_MASK) + 1) &
- PAGE_MASK;
- CDEBUG(D_VFSTRACE, "DIO size now %lu\n",
- size);
- continue;
- }
-
- goto out;
- }
- iov_iter_advance(iter, result);
- tot_bytes += result;
- file_offset += result;
- }
-out:
- if (tot_bytes > 0) {
- struct vvp_io *vio = vvp_env_io(env);
-
- /* no commit async for direct IO */
- vio->u.write.vui_written += tot_bytes;
- }
-
- return tot_bytes ? tot_bytes : result;
-}
-
-/**
- * Prepare partially written-to page for a write.
- */
-static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg)
-{
- struct cl_attr *attr = vvp_env_thread_attr(env);
- struct cl_object *obj = io->ci_obj;
- struct vvp_page *vpg = cl_object_page_slice(obj, pg);
- loff_t offset = cl_offset(obj, vvp_index(vpg));
- int result;
-
- cl_object_attr_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- cl_object_attr_unlock(obj);
- if (result == 0) {
- /*
- * If are writing to a new page, no need to read old data.
- * The extent locking will have updated the KMS, and for our
- * purposes here we can treat it like i_size.
- */
- if (attr->cat_kms <= offset) {
- char *kaddr = kmap_atomic(vpg->vpg_page);
-
- memset(kaddr, 0, cl_page_size(obj));
- kunmap_atomic(kaddr);
- } else if (vpg->vpg_defer_uptodate) {
- vpg->vpg_ra_used = 1;
- } else {
- result = ll_page_sync_io(env, io, pg, CRT_READ);
- }
- }
- return result;
-}
-
-static int ll_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int flags,
- struct page **pagep, void **fsdata)
-{
- struct ll_cl_context *lcc;
- const struct lu_env *env = NULL;
- struct cl_io *io;
- struct cl_page *page = NULL;
- struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
- pgoff_t index = pos >> PAGE_SHIFT;
- struct page *vmpage = NULL;
- unsigned int from = pos & (PAGE_SIZE - 1);
- unsigned int to = from + len;
- int result = 0;
-
- CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);
-
- lcc = ll_cl_find(file);
- if (!lcc) {
- io = NULL;
- result = -EIO;
- goto out;
- }
-
- env = lcc->lcc_env;
- io = lcc->lcc_io;
-
- /* To avoid deadlock, try to lock page first. */
- vmpage = grab_cache_page_nowait(mapping, index);
- if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) {
- struct vvp_io *vio = vvp_env_io(env);
- struct cl_page_list *plist = &vio->u.write.vui_queue;
-
- /* if the page is already in dirty cache, we have to commit
- * the pages right now; otherwise, it may cause deadlock
- * because it holds page lock of a dirty page and request for
- * more grants. It's okay for the dirty page to be the first
- * one in commit page list, though.
- */
- if (vmpage && plist->pl_nr > 0) {
- unlock_page(vmpage);
- put_page(vmpage);
- vmpage = NULL;
- }
-
- /* commit pages and then wait for page lock */
- result = vvp_io_write_commit(env, io);
- if (result < 0)
- goto out;
-
- if (!vmpage) {
- vmpage = grab_cache_page_write_begin(mapping, index,
- flags);
- if (!vmpage) {
- result = -ENOMEM;
- goto out;
- }
- }
- }
-
- page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
- if (IS_ERR(page)) {
- result = PTR_ERR(page);
- goto out;
- }
-
- lcc->lcc_page = page;
- lu_ref_add(&page->cp_reference, "cl_io", io);
-
- cl_page_assume(env, io, page);
- if (!PageUptodate(vmpage)) {
- /*
- * We're completely overwriting an existing page,
- * so _don't_ set it up to date until commit_write
- */
- if (from == 0 && to == PAGE_SIZE) {
- CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n");
- POISON_PAGE(vmpage, 0x11);
- } else {
- /* TODO: can be optimized at OSC layer to check if it
- * is a lockless IO. In that case, it's not necessary
- * to read the data.
- */
- result = ll_prepare_partial_page(env, io, page);
- if (result == 0)
- SetPageUptodate(vmpage);
- }
- }
- if (result < 0)
- cl_page_unassume(env, io, page);
-out:
- if (result < 0) {
- if (vmpage) {
- unlock_page(vmpage);
- put_page(vmpage);
- }
- if (!IS_ERR_OR_NULL(page)) {
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
- }
- if (io)
- io->ci_result = result;
- } else {
- *pagep = vmpage;
- *fsdata = lcc;
- }
- return result;
-}
-
-static int ll_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int copied,
- struct page *vmpage, void *fsdata)
-{
- struct ll_cl_context *lcc = fsdata;
- const struct lu_env *env;
- struct cl_io *io;
- struct vvp_io *vio;
- struct cl_page *page;
- unsigned int from = pos & (PAGE_SIZE - 1);
- bool unplug = false;
- int result = 0;
-
- put_page(vmpage);
-
- env = lcc->lcc_env;
- page = lcc->lcc_page;
- io = lcc->lcc_io;
- vio = vvp_env_io(env);
-
- LASSERT(cl_page_is_owned(page, io));
- if (copied > 0) {
- struct cl_page_list *plist = &vio->u.write.vui_queue;
-
- lcc->lcc_page = NULL; /* page will be queued */
-
- /* Add it into write queue */
- cl_page_list_add(plist, page);
- if (plist->pl_nr == 1) /* first page */
- vio->u.write.vui_from = from;
- else
- LASSERT(from == 0);
- vio->u.write.vui_to = from + copied;
-
- /*
- * To address the deadlock in balance_dirty_pages() where
- * this dirty page may be written back in the same thread.
- */
- if (PageDirty(vmpage))
- unplug = true;
-
- /* We may have one full RPC, commit it soon */
- if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
- unplug = true;
-
- CL_PAGE_DEBUG(D_VFSTRACE, env, page,
- "queued page: %d.\n", plist->pl_nr);
- } else {
- cl_page_disown(env, io, page);
-
- lcc->lcc_page = NULL;
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
-
- /* page list is not contiguous now, commit it now */
- unplug = true;
- }
-
- if (unplug ||
- file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
- result = vvp_io_write_commit(env, io);
-
- if (result < 0)
- io->ci_result = result;
- return result >= 0 ? copied : result;
-}
-
-#ifdef CONFIG_MIGRATION
-static int ll_migratepage(struct address_space *mapping,
- struct page *newpage, struct page *page,
- enum migrate_mode mode
- )
-{
- /* Always fail page migration until we have a proper implementation */
- return -EIO;
-}
-#endif
-
-const struct address_space_operations ll_aops = {
- .readpage = ll_readpage,
- .direct_IO = ll_direct_IO_26,
- .writepage = ll_writepage,
- .writepages = ll_writepages,
- .set_page_dirty = __set_page_dirty_nobuffers,
- .write_begin = ll_write_begin,
- .write_end = ll_write_end,
- .invalidatepage = ll_invalidatepage,
- .releasepage = (void *)ll_releasepage,
-#ifdef CONFIG_MIGRATION
- .migratepage = ll_migratepage,
-#endif
-};
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
deleted file mode 100644
index 155ce3cf6f60..000000000000
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ /dev/null
@@ -1,1577 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <lustre_dlm.h>
-#include "llite_internal.h"
-
-#define SA_OMITTED_ENTRY_MAX 8ULL
-
-enum se_stat {
- /** negative values are for error cases */
- SA_ENTRY_INIT = 0, /** init entry */
- SA_ENTRY_SUCC = 1, /** stat succeed */
- SA_ENTRY_INVA = 2, /** invalid entry */
-};
-
-/*
- * sa_entry is not refcounted: statahead thread allocates it and do async stat,
- * and in async stat callback ll_statahead_interpret() will add it into
- * sai_interim_entries, later statahead thread will call sa_handle_callback() to
- * instantiate entry and move it into sai_entries, and then only scanner process
- * can access and free it.
- */
-struct sa_entry {
- /* link into sai_interim_entries or sai_entries */
- struct list_head se_list;
- /* link into sai hash table locally */
- struct list_head se_hash;
- /* entry index in the sai */
- __u64 se_index;
- /* low layer ldlm lock handle */
- __u64 se_handle;
- /* entry status */
- enum se_stat se_state;
- /* entry size, contains name */
- int se_size;
- /* pointer to async getattr enqueue info */
- struct md_enqueue_info *se_minfo;
- /* pointer to the async getattr request */
- struct ptlrpc_request *se_req;
- /* pointer to the target inode */
- struct inode *se_inode;
- /* entry name */
- struct qstr se_qstr;
- /* entry fid */
- struct lu_fid se_fid;
-};
-
-static unsigned int sai_generation;
-static DEFINE_SPINLOCK(sai_generation_lock);
-
-/* sa_entry is ready to use */
-static inline int sa_ready(struct sa_entry *entry)
-{
- smp_rmb();
- return (entry->se_state != SA_ENTRY_INIT);
-}
-
-/* hash value to put in sai_cache */
-static inline int sa_hash(int val)
-{
- return val & LL_SA_CACHE_MASK;
-}
-
-/* hash entry into sai_cache */
-static inline void
-sa_rehash(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- int i = sa_hash(entry->se_qstr.hash);
-
- spin_lock(&sai->sai_cache_lock[i]);
- list_add_tail(&entry->se_hash, &sai->sai_cache[i]);
- spin_unlock(&sai->sai_cache_lock[i]);
-}
-
-/*
- * Remove entry from SA table.
- */
-static inline void
-sa_unhash(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- int i = sa_hash(entry->se_qstr.hash);
-
- spin_lock(&sai->sai_cache_lock[i]);
- list_del_init(&entry->se_hash);
- spin_unlock(&sai->sai_cache_lock[i]);
-}
-
-static inline int agl_should_run(struct ll_statahead_info *sai,
- struct inode *inode)
-{
- return (inode && S_ISREG(inode->i_mode) && sai->sai_agl_valid);
-}
-
-/* statahead window is full */
-static inline int sa_sent_full(struct ll_statahead_info *sai)
-{
- return atomic_read(&sai->sai_cache_count) >= sai->sai_max;
-}
-
-/* got async stat replies */
-static inline int sa_has_callback(struct ll_statahead_info *sai)
-{
- return !list_empty(&sai->sai_interim_entries);
-}
-
-static inline int agl_list_empty(struct ll_statahead_info *sai)
-{
- return list_empty(&sai->sai_agls);
-}
-
-/**
- * (1) hit ratio less than 80%
- * or
- * (2) consecutive miss more than 8
- * then means low hit.
- */
-static inline int sa_low_hit(struct ll_statahead_info *sai)
-{
- return ((sai->sai_hit > 7 && sai->sai_hit < 4 * sai->sai_miss) ||
- (sai->sai_consecutive_miss > 8));
-}
-
-/*
- * if the given index is behind of statahead window more than
- * SA_OMITTED_ENTRY_MAX, then it is old.
- */
-static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
-{
- return ((__u64)sai->sai_max + index + SA_OMITTED_ENTRY_MAX <
- sai->sai_index);
-}
-
-/* allocate sa_entry and hash it to allow scanner process to find it */
-static struct sa_entry *
-sa_alloc(struct dentry *parent, struct ll_statahead_info *sai, __u64 index,
- const char *name, int len, const struct lu_fid *fid)
-{
- struct ll_inode_info *lli;
- struct sa_entry *entry;
- int entry_size;
- char *dname;
-
- entry_size = sizeof(struct sa_entry) + (len & ~3) + 4;
- entry = kzalloc(entry_size, GFP_NOFS);
- if (unlikely(!entry))
- return ERR_PTR(-ENOMEM);
-
- CDEBUG(D_READA, "alloc sa entry %.*s(%p) index %llu\n",
- len, name, entry, index);
-
- entry->se_index = index;
- entry->se_state = SA_ENTRY_INIT;
- entry->se_size = entry_size;
- dname = (char *)entry + sizeof(struct sa_entry);
- memcpy(dname, name, len);
- dname[len] = 0;
-
- entry->se_qstr.hash = full_name_hash(parent, name, len);
- entry->se_qstr.len = len;
- entry->se_qstr.name = dname;
- entry->se_fid = *fid;
-
- lli = ll_i2info(sai->sai_dentry->d_inode);
- spin_lock(&lli->lli_sa_lock);
- INIT_LIST_HEAD(&entry->se_list);
- sa_rehash(sai, entry);
- spin_unlock(&lli->lli_sa_lock);
-
- atomic_inc(&sai->sai_cache_count);
-
- return entry;
-}
-
-/* free sa_entry, which should have been unhashed and not in any list */
-static void sa_free(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- CDEBUG(D_READA, "free sa entry %.*s(%p) index %llu\n",
- entry->se_qstr.len, entry->se_qstr.name, entry,
- entry->se_index);
-
- LASSERT(list_empty(&entry->se_list));
- LASSERT(list_empty(&entry->se_hash));
-
- kfree(entry);
- atomic_dec(&sai->sai_cache_count);
-}
-
-/*
- * find sa_entry by name, used by directory scanner, lock is not needed because
- * only scanner can remove the entry from cache.
- */
-static struct sa_entry *
-sa_get(struct ll_statahead_info *sai, const struct qstr *qstr)
-{
- struct sa_entry *entry;
- int i = sa_hash(qstr->hash);
-
- list_for_each_entry(entry, &sai->sai_cache[i], se_hash) {
- if (entry->se_qstr.hash == qstr->hash &&
- entry->se_qstr.len == qstr->len &&
- memcmp(entry->se_qstr.name, qstr->name, qstr->len) == 0)
- return entry;
- }
- return NULL;
-}
-
-/* unhash and unlink sa_entry, and then free it */
-static inline void
-sa_kill(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
-
- LASSERT(!list_empty(&entry->se_hash));
- LASSERT(!list_empty(&entry->se_list));
- LASSERT(sa_ready(entry));
-
- sa_unhash(sai, entry);
-
- spin_lock(&lli->lli_sa_lock);
- list_del_init(&entry->se_list);
- spin_unlock(&lli->lli_sa_lock);
-
- if (entry->se_inode)
- iput(entry->se_inode);
-
- sa_free(sai, entry);
-}
-
-/* called by scanner after use, sa_entry will be killed */
-static void
-sa_put(struct ll_statahead_info *sai, struct sa_entry *entry, struct ll_inode_info *lli)
-{
- struct sa_entry *tmp, *next;
-
- if (entry && entry->se_state == SA_ENTRY_SUCC) {
- struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode);
-
- sai->sai_hit++;
- sai->sai_consecutive_miss = 0;
- sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max);
- } else {
- sai->sai_miss++;
- sai->sai_consecutive_miss++;
- }
-
- if (entry)
- sa_kill(sai, entry);
-
- /*
- * kill old completed entries, only scanner process does this, no need
- * to lock
- */
- list_for_each_entry_safe(tmp, next, &sai->sai_entries, se_list) {
- if (!is_omitted_entry(sai, tmp->se_index))
- break;
- sa_kill(sai, tmp);
- }
-
- spin_lock(&lli->lli_sa_lock);
- if (sai->sai_task)
- wake_up_process(sai->sai_task);
- spin_unlock(&lli->lli_sa_lock);
-
-}
-
-/*
- * update state and sort add entry to sai_entries by index, return true if
- * scanner is waiting on this entry.
- */
-static bool
-__sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret)
-{
- struct list_head *pos = &sai->sai_entries;
- __u64 index = entry->se_index;
- struct sa_entry *se;
-
- LASSERT(!sa_ready(entry));
- LASSERT(list_empty(&entry->se_list));
-
- list_for_each_entry_reverse(se, &sai->sai_entries, se_list) {
- if (se->se_index < entry->se_index) {
- pos = &se->se_list;
- break;
- }
- }
- list_add(&entry->se_list, pos);
- entry->se_state = ret < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC;
-
- return (index == sai->sai_index_wait);
-}
-
-/*
- * release resources used in async stat RPC, update entry state and wakeup if
- * scanner process it waiting on this entry.
- */
-static void
-sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret)
-{
- struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
- struct md_enqueue_info *minfo = entry->se_minfo;
- struct ptlrpc_request *req = entry->se_req;
- bool wakeup;
-
- /* release resources used in RPC */
- if (minfo) {
- entry->se_minfo = NULL;
- ll_intent_release(&minfo->mi_it);
- iput(minfo->mi_dir);
- kfree(minfo);
- }
-
- if (req) {
- entry->se_req = NULL;
- ptlrpc_req_finished(req);
- }
-
- spin_lock(&lli->lli_sa_lock);
- wakeup = __sa_make_ready(sai, entry, ret);
- spin_unlock(&lli->lli_sa_lock);
-
- if (wakeup)
- wake_up(&sai->sai_waitq);
-}
-
-/* Insert inode into the list of sai_agls. */
-static void ll_agl_add(struct ll_statahead_info *sai,
- struct inode *inode, int index)
-{
- struct ll_inode_info *child = ll_i2info(inode);
- struct ll_inode_info *parent = ll_i2info(sai->sai_dentry->d_inode);
- int added = 0;
-
- spin_lock(&child->lli_agl_lock);
- if (child->lli_agl_index == 0) {
- child->lli_agl_index = index;
- spin_unlock(&child->lli_agl_lock);
-
- LASSERT(list_empty(&child->lli_agl_list));
-
- igrab(inode);
- spin_lock(&parent->lli_agl_lock);
- if (list_empty(&sai->sai_agls))
- added = 1;
- list_add_tail(&child->lli_agl_list, &sai->sai_agls);
- spin_unlock(&parent->lli_agl_lock);
- } else {
- spin_unlock(&child->lli_agl_lock);
- }
-
- if (added > 0)
- wake_up_process(sai->sai_agl_task);
-}
-
-/* allocate sai */
-static struct ll_statahead_info *ll_sai_alloc(struct dentry *dentry)
-{
- struct ll_inode_info *lli = ll_i2info(dentry->d_inode);
- struct ll_statahead_info *sai;
- int i;
-
- sai = kzalloc(sizeof(*sai), GFP_NOFS);
- if (!sai)
- return NULL;
-
- sai->sai_dentry = dget(dentry);
- atomic_set(&sai->sai_refcount, 1);
-
- sai->sai_max = LL_SA_RPC_MIN;
- sai->sai_index = 1;
- init_waitqueue_head(&sai->sai_waitq);
-
- INIT_LIST_HEAD(&sai->sai_interim_entries);
- INIT_LIST_HEAD(&sai->sai_entries);
- INIT_LIST_HEAD(&sai->sai_agls);
-
- for (i = 0; i < LL_SA_CACHE_SIZE; i++) {
- INIT_LIST_HEAD(&sai->sai_cache[i]);
- spin_lock_init(&sai->sai_cache_lock[i]);
- }
- atomic_set(&sai->sai_cache_count, 0);
-
- spin_lock(&sai_generation_lock);
- lli->lli_sa_generation = ++sai_generation;
- if (unlikely(!sai_generation))
- lli->lli_sa_generation = ++sai_generation;
- spin_unlock(&sai_generation_lock);
-
- return sai;
-}
-
-/* free sai */
-static inline void ll_sai_free(struct ll_statahead_info *sai)
-{
- LASSERT(sai->sai_dentry);
- dput(sai->sai_dentry);
- kfree(sai);
-}
-
-/*
- * take refcount of sai if sai for @dir exists, which means statahead is on for
- * this directory.
- */
-static inline struct ll_statahead_info *ll_sai_get(struct inode *dir)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = NULL;
-
- spin_lock(&lli->lli_sa_lock);
- sai = lli->lli_sai;
- if (sai)
- atomic_inc(&sai->sai_refcount);
- spin_unlock(&lli->lli_sa_lock);
-
- return sai;
-}
-
-/*
- * put sai refcount after use, if refcount reaches zero, free sai and sa_entries
- * attached to it.
- */
-static void ll_sai_put(struct ll_statahead_info *sai)
-{
- struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
-
- if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_sa_lock)) {
- struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode);
- struct sa_entry *entry, *next;
-
- lli->lli_sai = NULL;
- spin_unlock(&lli->lli_sa_lock);
-
- LASSERT(sai->sai_task == NULL);
- LASSERT(sai->sai_agl_task == NULL);
- LASSERT(sai->sai_sent == sai->sai_replied);
- LASSERT(!sa_has_callback(sai));
-
- list_for_each_entry_safe(entry, next, &sai->sai_entries,
- se_list)
- sa_kill(sai, entry);
-
- LASSERT(atomic_read(&sai->sai_cache_count) == 0);
- LASSERT(list_empty(&sai->sai_agls));
-
- ll_sai_free(sai);
- atomic_dec(&sbi->ll_sa_running);
- }
-}
-
-/* Do NOT forget to drop inode refcount when into sai_agls. */
-static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- __u64 index = lli->lli_agl_index;
- int rc;
-
- LASSERT(list_empty(&lli->lli_agl_list));
-
- /* AGL maybe fall behind statahead with one entry */
- if (is_omitted_entry(sai, index + 1)) {
- lli->lli_agl_index = 0;
- iput(inode);
- return;
- }
-
- /* Someone is in glimpse (sync or async), do nothing. */
- rc = down_write_trylock(&lli->lli_glimpse_sem);
- if (rc == 0) {
- lli->lli_agl_index = 0;
- iput(inode);
- return;
- }
-
- /*
- * Someone triggered glimpse within 1 sec before.
- * 1) The former glimpse succeeded with glimpse lock granted by OST, and
- * if the lock is still cached on client, AGL needs to do nothing. If
- * it is cancelled by other client, AGL maybe cannot obtain new lock
- * for no glimpse callback triggered by AGL.
- * 2) The former glimpse succeeded, but OST did not grant glimpse lock.
- * Under such case, it is quite possible that the OST will not grant
- * glimpse lock for AGL also.
- * 3) The former glimpse failed, compared with other two cases, it is
- * relative rare. AGL can ignore such case, and it will not muchly
- * affect the performance.
- */
- if (lli->lli_glimpse_time != 0 &&
- time_before(cfs_time_shift(-1), lli->lli_glimpse_time)) {
- up_write(&lli->lli_glimpse_sem);
- lli->lli_agl_index = 0;
- iput(inode);
- return;
- }
-
- CDEBUG(D_READA, "Handling (init) async glimpse: inode = "
- DFID ", idx = %llu\n", PFID(&lli->lli_fid), index);
-
- cl_agl(inode);
- lli->lli_agl_index = 0;
- lli->lli_glimpse_time = cfs_time_current();
- up_write(&lli->lli_glimpse_sem);
-
- CDEBUG(D_READA, "Handled (init) async glimpse: inode= "
- DFID ", idx = %llu, rc = %d\n",
- PFID(&lli->lli_fid), index, rc);
-
- iput(inode);
-}
-
-/*
- * prepare inode for sa entry, add it into agl list, now sa_entry is ready
- * to be used by scanner process.
- */
-static void sa_instantiate(struct ll_statahead_info *sai,
- struct sa_entry *entry)
-{
- struct inode *dir = sai->sai_dentry->d_inode;
- struct inode *child;
- struct md_enqueue_info *minfo;
- struct lookup_intent *it;
- struct ptlrpc_request *req;
- struct mdt_body *body;
- int rc = 0;
-
- LASSERT(entry->se_handle != 0);
-
- minfo = entry->se_minfo;
- it = &minfo->mi_it;
- req = entry->se_req;
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EFAULT;
- goto out;
- }
-
- child = entry->se_inode;
- if (child) {
- /* revalidate; unlinked and re-created with the same name */
- if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->mbo_fid1))) {
- entry->se_inode = NULL;
- iput(child);
- child = NULL;
- }
- }
-
- it->it_lock_handle = entry->se_handle;
- rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL);
- if (rc != 1) {
- rc = -EAGAIN;
- goto out;
- }
-
- rc = ll_prep_inode(&child, req, dir->i_sb, it);
- if (rc)
- goto out;
-
- CDEBUG(D_READA, "%s: setting %.*s" DFID " l_data to inode %p\n",
- ll_get_fsname(child->i_sb, NULL, 0),
- entry->se_qstr.len, entry->se_qstr.name,
- PFID(ll_inode2fid(child)), child);
- ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL);
-
- entry->se_inode = child;
-
- if (agl_should_run(sai, child))
- ll_agl_add(sai, child, entry->se_index);
-
-out:
- /*
- * sa_make_ready() will drop ldlm ibits lock refcount by calling
- * ll_intent_drop_lock() in spite of failures. Do not worry about
- * calling ll_intent_drop_lock() more than once.
- */
- sa_make_ready(sai, entry, rc);
-}
-
-/* once there are async stat replies, instantiate sa_entry from replies */
-static void sa_handle_callback(struct ll_statahead_info *sai)
-{
- struct ll_inode_info *lli;
-
- lli = ll_i2info(sai->sai_dentry->d_inode);
-
- while (sa_has_callback(sai)) {
- struct sa_entry *entry;
-
- spin_lock(&lli->lli_sa_lock);
- if (unlikely(!sa_has_callback(sai))) {
- spin_unlock(&lli->lli_sa_lock);
- break;
- }
- entry = list_entry(sai->sai_interim_entries.next,
- struct sa_entry, se_list);
- list_del_init(&entry->se_list);
- spin_unlock(&lli->lli_sa_lock);
-
- sa_instantiate(sai, entry);
- }
-}
-
-/*
- * callback for async stat, because this is called in ptlrpcd context, we only
- * put sa_entry in sai_cb_entries list, and let sa_handle_callback() to really
- * prepare inode and instantiate sa_entry later.
- */
-static int ll_statahead_interpret(struct ptlrpc_request *req,
- struct md_enqueue_info *minfo, int rc)
-{
- struct lookup_intent *it = &minfo->mi_it;
- struct inode *dir = minfo->mi_dir;
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct sa_entry *entry = (struct sa_entry *)minfo->mi_cbdata;
- __u64 handle = 0;
-
- if (it_disposition(it, DISP_LOOKUP_NEG))
- rc = -ENOENT;
-
- /*
- * because statahead thread will wait for all inflight RPC to finish,
- * sai should be always valid, no need to refcount
- */
- LASSERT(sai);
- LASSERT(entry);
-
- CDEBUG(D_READA, "sa_entry %.*s rc %d\n",
- entry->se_qstr.len, entry->se_qstr.name, rc);
-
- if (rc) {
- ll_intent_release(it);
- iput(dir);
- kfree(minfo);
- } else {
- /*
- * release ibits lock ASAP to avoid deadlock when statahead
- * thread enqueues lock on parent in readdir and another
- * process enqueues lock on child with parent lock held, eg.
- * unlink.
- */
- handle = it->it_lock_handle;
- ll_intent_drop_lock(it);
- }
-
- spin_lock(&lli->lli_sa_lock);
- if (rc) {
- if (__sa_make_ready(sai, entry, rc))
- wake_up(&sai->sai_waitq);
- } else {
- int first = 0;
- entry->se_minfo = minfo;
- entry->se_req = ptlrpc_request_addref(req);
- /*
- * Release the async ibits lock ASAP to avoid deadlock
- * when statahead thread tries to enqueue lock on parent
- * for readpage and other tries to enqueue lock on child
- * with parent's lock held, for example: unlink.
- */
- entry->se_handle = handle;
- if (!sa_has_callback(sai))
- first = 1;
-
- list_add_tail(&entry->se_list, &sai->sai_interim_entries);
-
- if (first && sai->sai_task)
- wake_up_process(sai->sai_task);
- }
- sai->sai_replied++;
-
- spin_unlock(&lli->lli_sa_lock);
-
- return rc;
-}
-
-/* finish async stat RPC arguments */
-static void sa_fini_data(struct md_enqueue_info *minfo)
-{
- iput(minfo->mi_dir);
- kfree(minfo);
-}
-
-/**
- * prepare arguments for async stat RPC.
- */
-static struct md_enqueue_info *
-sa_prep_data(struct inode *dir, struct inode *child, struct sa_entry *entry)
-{
- struct md_enqueue_info *minfo;
- struct ldlm_enqueue_info *einfo;
- struct md_op_data *op_data;
-
- minfo = kzalloc(sizeof(*minfo), GFP_NOFS);
- if (!minfo)
- return ERR_PTR(-ENOMEM);
-
- op_data = ll_prep_md_op_data(&minfo->mi_data, dir, child, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- kfree(minfo);
- return (struct md_enqueue_info *)op_data;
- }
-
- if (!child)
- op_data->op_fid2 = entry->se_fid;
-
- minfo->mi_it.it_op = IT_GETATTR;
- minfo->mi_dir = igrab(dir);
- minfo->mi_cb = ll_statahead_interpret;
- minfo->mi_cbdata = entry;
-
- einfo = &minfo->mi_einfo;
- einfo->ei_type = LDLM_IBITS;
- einfo->ei_mode = it_to_lock_mode(&minfo->mi_it);
- einfo->ei_cb_bl = ll_md_blocking_ast;
- einfo->ei_cb_cp = ldlm_completion_ast;
- einfo->ei_cb_gl = NULL;
- einfo->ei_cbdata = NULL;
-
- return minfo;
-}
-
-/* async stat for file not found in dcache */
-static int sa_lookup(struct inode *dir, struct sa_entry *entry)
-{
- struct md_enqueue_info *minfo;
- int rc;
-
- minfo = sa_prep_data(dir, NULL, entry);
- if (IS_ERR(minfo))
- return PTR_ERR(minfo);
-
- rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo);
- if (rc)
- sa_fini_data(minfo);
-
- return rc;
-}
-
-/**
- * async stat for file found in dcache, similar to .revalidate
- *
- * \retval 1 dentry valid, no RPC sent
- * \retval 0 dentry invalid, will send async stat RPC
- * \retval negative number upon error
- */
-static int sa_revalidate(struct inode *dir, struct sa_entry *entry,
- struct dentry *dentry)
-{
- struct inode *inode = d_inode(dentry);
- struct lookup_intent it = { .it_op = IT_GETATTR,
- .it_lock_handle = 0 };
- struct md_enqueue_info *minfo;
- int rc;
-
- if (unlikely(!inode))
- return 1;
-
- if (d_mountpoint(dentry))
- return 1;
-
- entry->se_inode = igrab(inode);
- rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode),
- NULL);
- if (rc == 1) {
- entry->se_handle = it.it_lock_handle;
- ll_intent_release(&it);
- return 1;
- }
-
- minfo = sa_prep_data(dir, inode, entry);
- if (IS_ERR(minfo)) {
- entry->se_inode = NULL;
- iput(inode);
- return PTR_ERR(minfo);
- }
-
- rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo);
- if (rc) {
- entry->se_inode = NULL;
- iput(inode);
- sa_fini_data(minfo);
- }
-
- return rc;
-}
-
-/* async stat for file with @name */
-static void sa_statahead(struct dentry *parent, const char *name, int len,
- const struct lu_fid *fid)
-{
- struct inode *dir = d_inode(parent);
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct dentry *dentry = NULL;
- struct sa_entry *entry;
- int rc;
-
- entry = sa_alloc(parent, sai, sai->sai_index, name, len, fid);
- if (IS_ERR(entry))
- return;
-
- dentry = d_lookup(parent, &entry->se_qstr);
- if (!dentry) {
- rc = sa_lookup(dir, entry);
- } else {
- rc = sa_revalidate(dir, entry, dentry);
- if (rc == 1 && agl_should_run(sai, d_inode(dentry)))
- ll_agl_add(sai, d_inode(dentry), entry->se_index);
- }
-
- if (dentry)
- dput(dentry);
-
- if (rc)
- sa_make_ready(sai, entry, rc);
- else
- sai->sai_sent++;
-
- sai->sai_index++;
-}
-
-/* async glimpse (agl) thread main function */
-static int ll_agl_thread(void *arg)
-{
- struct dentry *parent = arg;
- struct inode *dir = d_inode(parent);
- struct ll_inode_info *plli = ll_i2info(dir);
- struct ll_inode_info *clli;
- /* We already own this reference, so it is safe to take it without a lock. */
- struct ll_statahead_info *sai = plli->lli_sai;
-
- CDEBUG(D_READA, "agl thread started: sai %p, parent %pd\n",
- sai, parent);
-
- while (!kthread_should_stop()) {
-
- spin_lock(&plli->lli_agl_lock);
- /* The statahead thread maybe help to process AGL entries,
- * so check whether list empty again.
- */
- if (!list_empty(&sai->sai_agls)) {
- clli = list_entry(sai->sai_agls.next,
- struct ll_inode_info, lli_agl_list);
- list_del_init(&clli->lli_agl_list);
- spin_unlock(&plli->lli_agl_lock);
- ll_agl_trigger(&clli->lli_vfs_inode, sai);
- } else {
- spin_unlock(&plli->lli_agl_lock);
- }
-
- set_current_state(TASK_IDLE);
- if (list_empty(&sai->sai_agls) &&
- !kthread_should_stop())
- schedule();
- __set_current_state(TASK_RUNNING);
- }
-
- spin_lock(&plli->lli_agl_lock);
- sai->sai_agl_valid = 0;
- while (!list_empty(&sai->sai_agls)) {
- clli = list_entry(sai->sai_agls.next,
- struct ll_inode_info, lli_agl_list);
- list_del_init(&clli->lli_agl_list);
- spin_unlock(&plli->lli_agl_lock);
- clli->lli_agl_index = 0;
- iput(&clli->lli_vfs_inode);
- spin_lock(&plli->lli_agl_lock);
- }
- spin_unlock(&plli->lli_agl_lock);
- CDEBUG(D_READA, "agl thread stopped: sai %p, parent %pd\n",
- sai, parent);
- ll_sai_put(sai);
- return 0;
-}
-
-/* start agl thread */
-static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
-{
- struct ll_inode_info *plli;
- struct task_struct *task;
-
- CDEBUG(D_READA, "start agl thread: sai %p, parent %pd\n",
- sai, parent);
-
- plli = ll_i2info(d_inode(parent));
- task = kthread_create(ll_agl_thread, parent, "ll_agl_%u",
- plli->lli_opendir_pid);
- if (IS_ERR(task)) {
- CERROR("can't start ll_agl thread, rc: %ld\n", PTR_ERR(task));
- return;
- }
-
- sai->sai_agl_task = task;
- atomic_inc(&ll_i2sbi(d_inode(parent))->ll_agl_total);
- spin_lock(&plli->lli_agl_lock);
- sai->sai_agl_valid = 1;
- spin_unlock(&plli->lli_agl_lock);
- /* Get an extra reference that the thread holds */
- ll_sai_get(d_inode(parent));
-
- wake_up_process(task);
-}
-
-/* statahead thread main function */
-static int ll_statahead_thread(void *arg)
-{
- struct dentry *parent = arg;
- struct inode *dir = d_inode(parent);
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct page *page = NULL;
- __u64 pos = 0;
- int first = 0;
- int rc = 0;
- struct md_op_data *op_data;
-
- CDEBUG(D_READA, "statahead thread starting: sai %p, parent %pd\n",
- sai, parent);
-
- op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
- LUSTRE_OPC_ANY, dir);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
-
- while (pos != MDS_DIR_END_OFF && sai->sai_task) {
- struct lu_dirpage *dp;
- struct lu_dirent *ent;
-
- sai->sai_in_readpage = 1;
- page = ll_get_dir_page(dir, op_data, pos);
- sai->sai_in_readpage = 0;
- if (IS_ERR(page)) {
- rc = PTR_ERR(page);
- CDEBUG(D_READA, "error reading dir " DFID " at %llu/%llu: opendir_pid = %u: rc = %d\n",
- PFID(ll_inode2fid(dir)), pos, sai->sai_index,
- lli->lli_opendir_pid, rc);
- break;
- }
-
- dp = page_address(page);
- for (ent = lu_dirent_start(dp);
- ent && sai->sai_task && !sa_low_hit(sai);
- ent = lu_dirent_next(ent)) {
- struct lu_fid fid;
- __u64 hash;
- int namelen;
- char *name;
-
- hash = le64_to_cpu(ent->lde_hash);
- if (unlikely(hash < pos))
- /*
- * Skip until we find target hash value.
- */
- continue;
-
- namelen = le16_to_cpu(ent->lde_namelen);
- if (unlikely(namelen == 0))
- /*
- * Skip dummy record.
- */
- continue;
-
- name = ent->lde_name;
- if (name[0] == '.') {
- if (namelen == 1) {
- /*
- * skip "."
- */
- continue;
- } else if (name[1] == '.' && namelen == 2) {
- /*
- * skip ".."
- */
- continue;
- } else if (!sai->sai_ls_all) {
- /*
- * skip hidden files.
- */
- sai->sai_skip_hidden++;
- continue;
- }
- }
-
- /*
- * don't stat-ahead first entry.
- */
- if (unlikely(++first == 1))
- continue;
-
- fid_le_to_cpu(&fid, &ent->lde_fid);
-
- do {
- sa_handle_callback(sai);
-
- spin_lock(&lli->lli_agl_lock);
- while (sa_sent_full(sai) &&
- !agl_list_empty(sai)) {
- struct ll_inode_info *clli;
-
- clli = list_entry(sai->sai_agls.next,
- struct ll_inode_info,
- lli_agl_list);
- list_del_init(&clli->lli_agl_list);
- spin_unlock(&lli->lli_agl_lock);
-
- ll_agl_trigger(&clli->lli_vfs_inode,
- sai);
-
- spin_lock(&lli->lli_agl_lock);
- }
- spin_unlock(&lli->lli_agl_lock);
-
- set_current_state(TASK_IDLE);
- if (sa_sent_full(sai) &&
- !sa_has_callback(sai) &&
- agl_list_empty(sai) &&
- sai->sai_task)
- /* wait for spare statahead window */
- schedule();
- __set_current_state(TASK_RUNNING);
- } while (sa_sent_full(sai) && sai->sai_task);
-
- sa_statahead(parent, name, namelen, &fid);
- }
-
- pos = le64_to_cpu(dp->ldp_hash_end);
- ll_release_page(dir, page,
- le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
-
- if (sa_low_hit(sai)) {
- rc = -EFAULT;
- atomic_inc(&sbi->ll_sa_wrong);
- CDEBUG(D_READA, "Statahead for dir " DFID " hit ratio too low: hit/miss %llu/%llu, sent/replied %llu/%llu, stopping statahead thread: pid %d\n",
- PFID(&lli->lli_fid), sai->sai_hit,
- sai->sai_miss, sai->sai_sent,
- sai->sai_replied, current_pid());
- break;
- }
- }
- ll_finish_md_op_data(op_data);
-
- if (rc < 0) {
- spin_lock(&lli->lli_sa_lock);
- sai->sai_task = NULL;
- lli->lli_sa_enabled = 0;
- spin_unlock(&lli->lli_sa_lock);
- }
-
- /*
- * statahead is finished, but statahead entries need to be cached, wait
- * for file release to stop me.
- */
- while (sai->sai_task) {
- sa_handle_callback(sai);
-
- set_current_state(TASK_IDLE);
- if (!sa_has_callback(sai) &&
- sai->sai_task)
- schedule();
- __set_current_state(TASK_RUNNING);
- }
-out:
- if (sai->sai_agl_task) {
- kthread_stop(sai->sai_agl_task);
-
- CDEBUG(D_READA, "stop agl thread: sai %p pid %u\n",
- sai, (unsigned int)sai->sai_agl_task->pid);
- sai->sai_agl_task = NULL;
- }
- /*
- * wait for inflight statahead RPCs to finish, and then we can free sai
- * safely because statahead RPC will access sai data
- */
- while (sai->sai_sent != sai->sai_replied) {
- /* in case we're not woken up, timeout wait */
- schedule_timeout_idle(HZ>>3);
- }
-
- /* release resources held by statahead RPCs */
- sa_handle_callback(sai);
-
- CDEBUG(D_READA, "statahead thread stopped: sai %p, parent %pd\n",
- sai, parent);
-
- spin_lock(&lli->lli_sa_lock);
- sai->sai_task = NULL;
- spin_unlock(&lli->lli_sa_lock);
-
- wake_up(&sai->sai_waitq);
- ll_sai_put(sai);
-
- do_exit(rc);
-}
-
-/* authorize opened dir handle @key to statahead */
-void ll_authorize_statahead(struct inode *dir, void *key)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
-
- spin_lock(&lli->lli_sa_lock);
- if (!lli->lli_opendir_key && !lli->lli_sai) {
- /*
- * if lli_sai is not NULL, it means previous statahead is not
- * finished yet, we'd better not start a new statahead for now.
- */
- LASSERT(!lli->lli_opendir_pid);
- lli->lli_opendir_key = key;
- lli->lli_opendir_pid = current_pid();
- lli->lli_sa_enabled = 1;
- }
- spin_unlock(&lli->lli_sa_lock);
-}
-
-/*
- * deauthorize opened dir handle @key to statahead, but statahead thread may
- * still be running, notify it to quit.
- */
-void ll_deauthorize_statahead(struct inode *dir, void *key)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai;
-
- LASSERT(lli->lli_opendir_key == key);
- LASSERT(lli->lli_opendir_pid);
-
- CDEBUG(D_READA, "deauthorize statahead for " DFID "\n",
- PFID(&lli->lli_fid));
-
- spin_lock(&lli->lli_sa_lock);
- lli->lli_opendir_key = NULL;
- lli->lli_opendir_pid = 0;
- lli->lli_sa_enabled = 0;
- sai = lli->lli_sai;
- if (sai && sai->sai_task) {
- /*
- * statahead thread may not quit yet because it needs to cache
- * entries, now it's time to tell it to quit.
- */
- wake_up_process(sai->sai_task);
- sai->sai_task = NULL;
- }
- spin_unlock(&lli->lli_sa_lock);
-}
-
-enum {
- /**
- * not first dirent, or is "."
- */
- LS_NOT_FIRST_DE = 0,
- /**
- * the first non-hidden dirent
- */
- LS_FIRST_DE,
- /**
- * the first hidden dirent, that is "."
- */
- LS_FIRST_DOT_DE
-};
-
-/* file is first dirent under @dir */
-static int is_first_dirent(struct inode *dir, struct dentry *dentry)
-{
- const struct qstr *target = &dentry->d_name;
- struct md_op_data *op_data;
- struct page *page;
- __u64 pos = 0;
- int dot_de;
- int rc = LS_NOT_FIRST_DE;
-
- op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
- LUSTRE_OPC_ANY, dir);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
- /**
- * FIXME choose the start offset of the readdir
- */
- op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
-
- page = ll_get_dir_page(dir, op_data, pos);
-
- while (1) {
- struct lu_dirpage *dp;
- struct lu_dirent *ent;
-
- if (IS_ERR(page)) {
- struct ll_inode_info *lli = ll_i2info(dir);
-
- rc = PTR_ERR(page);
- CERROR("%s: error reading dir " DFID " at %llu: opendir_pid = %u : rc = %d\n",
- ll_get_fsname(dir->i_sb, NULL, 0),
- PFID(ll_inode2fid(dir)), pos,
- lli->lli_opendir_pid, rc);
- break;
- }
-
- dp = page_address(page);
- for (ent = lu_dirent_start(dp); ent;
- ent = lu_dirent_next(ent)) {
- __u64 hash;
- int namelen;
- char *name;
-
- hash = le64_to_cpu(ent->lde_hash);
- /* The ll_get_dir_page() can return any page containing
- * the given hash which may be not the start hash.
- */
- if (unlikely(hash < pos))
- continue;
-
- namelen = le16_to_cpu(ent->lde_namelen);
- if (unlikely(namelen == 0))
- /*
- * skip dummy record.
- */
- continue;
-
- name = ent->lde_name;
- if (name[0] == '.') {
- if (namelen == 1)
- /*
- * skip "."
- */
- continue;
- else if (name[1] == '.' && namelen == 2)
- /*
- * skip ".."
- */
- continue;
- else
- dot_de = 1;
- } else {
- dot_de = 0;
- }
-
- if (dot_de && target->name[0] != '.') {
- CDEBUG(D_READA, "%.*s skip hidden file %.*s\n",
- target->len, target->name,
- namelen, name);
- continue;
- }
-
- if (target->len != namelen ||
- memcmp(target->name, name, namelen) != 0)
- rc = LS_NOT_FIRST_DE;
- else if (!dot_de)
- rc = LS_FIRST_DE;
- else
- rc = LS_FIRST_DOT_DE;
-
- ll_release_page(dir, page, false);
- goto out;
- }
- pos = le64_to_cpu(dp->ldp_hash_end);
- if (pos == MDS_DIR_END_OFF) {
- /*
- * End of directory reached.
- */
- ll_release_page(dir, page, false);
- goto out;
- } else {
- /*
- * chain is exhausted
- * Normal case: continue to the next page.
- */
- ll_release_page(dir, page,
- le32_to_cpu(dp->ldp_flags) &
- LDF_COLLIDE);
- page = ll_get_dir_page(dir, op_data, pos);
- }
- }
-out:
- ll_finish_md_op_data(op_data);
- return rc;
-}
-
-/**
- * revalidate @dentryp from statahead cache
- *
- * \param[in] dir parent directory
- * \param[in] sai sai structure
- * \param[out] dentryp pointer to dentry which will be revalidated
- * \param[in] unplug unplug statahead window only (normally for negative
- * dentry)
- * \retval 1 on success, dentry is saved in @dentryp
- * \retval 0 if revalidation failed (no proper lock on client)
- * \retval negative number upon error
- */
-static int revalidate_statahead_dentry(struct inode *dir,
- struct ll_statahead_info *sai,
- struct dentry **dentryp,
- bool unplug)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct sa_entry *entry = NULL;
- struct ll_dentry_data *ldd;
- int rc = 0;
-
- if ((*dentryp)->d_name.name[0] == '.') {
- if (sai->sai_ls_all ||
- sai->sai_miss_hidden >= sai->sai_skip_hidden) {
- /*
- * Hidden dentry is the first one, or statahead
- * thread does not skip so many hidden dentries
- * before "sai_ls_all" enabled as below.
- */
- } else {
- if (!sai->sai_ls_all)
- /*
- * It maybe because hidden dentry is not
- * the first one, "sai_ls_all" was not
- * set, then "ls -al" missed. Enable
- * "sai_ls_all" for such case.
- */
- sai->sai_ls_all = 1;
-
- /*
- * Such "getattr" has been skipped before
- * "sai_ls_all" enabled as above.
- */
- sai->sai_miss_hidden++;
- return -EAGAIN;
- }
- }
-
- if (unplug) {
- rc = 1;
- goto out_unplug;
- }
-
- entry = sa_get(sai, &(*dentryp)->d_name);
- if (!entry) {
- rc = -EAGAIN;
- goto out_unplug;
- }
-
- /* if statahead is busy in readdir, help it do post-work */
- if (!sa_ready(entry) && sai->sai_in_readpage)
- sa_handle_callback(sai);
-
- if (!sa_ready(entry)) {
- spin_lock(&lli->lli_sa_lock);
- sai->sai_index_wait = entry->se_index;
- spin_unlock(&lli->lli_sa_lock);
- if (0 == wait_event_idle_timeout(sai->sai_waitq,
- sa_ready(entry), 30 * HZ)) {
- /*
- * entry may not be ready, so it may be used by inflight
- * statahead RPC, don't free it.
- */
- entry = NULL;
- rc = -EAGAIN;
- goto out_unplug;
- }
- }
-
- if (entry->se_state == SA_ENTRY_SUCC && entry->se_inode) {
- struct inode *inode = entry->se_inode;
- struct lookup_intent it = { .it_op = IT_GETATTR,
- .it_lock_handle = entry->se_handle };
- __u64 bits;
-
- rc = md_revalidate_lock(ll_i2mdexp(dir), &it,
- ll_inode2fid(inode), &bits);
- if (rc == 1) {
- if (!(*dentryp)->d_inode) {
- struct dentry *alias;
-
- alias = ll_splice_alias(inode, *dentryp);
- if (IS_ERR(alias)) {
- ll_intent_release(&it);
- rc = PTR_ERR(alias);
- goto out_unplug;
- }
- *dentryp = alias;
- /**
- * statahead prepared this inode, transfer inode
- * refcount from sa_entry to dentry
- */
- entry->se_inode = NULL;
- } else if ((*dentryp)->d_inode != inode) {
- /* revalidate, but inode is recreated */
- CDEBUG(D_READA,
- "%s: stale dentry %pd inode " DFID ", statahead inode " DFID "\n",
- ll_get_fsname((*dentryp)->d_inode->i_sb,
- NULL, 0),
- *dentryp,
- PFID(ll_inode2fid((*dentryp)->d_inode)),
- PFID(ll_inode2fid(inode)));
- ll_intent_release(&it);
- rc = -ESTALE;
- goto out_unplug;
- }
-
- if ((bits & MDS_INODELOCK_LOOKUP) &&
- d_lustre_invalid(*dentryp))
- d_lustre_revalidate(*dentryp);
- ll_intent_release(&it);
- }
- }
-out_unplug:
- /*
- * statahead cached sa_entry can be used only once, and will be killed
- * right after use, so if lookup/revalidate accessed statahead cache,
- * set dentry ldd_sa_generation to parent lli_sa_generation, later if we
- * stat this file again, we know we've done statahead before, see
- * dentry_may_statahead().
- */
- ldd = ll_d2d(*dentryp);
- ldd->lld_sa_generation = lli->lli_sa_generation;
- sa_put(sai, entry, lli);
- return rc;
-}
-
-/**
- * start statahead thread
- *
- * \param[in] dir parent directory
- * \param[in] dentry dentry that triggers statahead, normally the first
- * dirent under @dir
- * \retval -EAGAIN on success, because when this function is
- * called, it's already in lookup call, so client should
- * do it itself instead of waiting for statahead thread
- * to do it asynchronously.
- * \retval negative number upon error
- */
-static int start_statahead_thread(struct inode *dir, struct dentry *dentry)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = NULL;
- struct task_struct *task;
- struct dentry *parent = dentry->d_parent;
- int rc;
-
- /* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */
- rc = is_first_dirent(dir, dentry);
- if (rc == LS_NOT_FIRST_DE) {
- /* It is not "ls -{a}l" operation, no need statahead for it. */
- rc = -EFAULT;
- goto out;
- }
-
- sai = ll_sai_alloc(parent);
- if (!sai) {
- rc = -ENOMEM;
- goto out;
- }
-
- sai->sai_ls_all = (rc == LS_FIRST_DOT_DE);
- /*
- * if current lli_opendir_key was deauthorized, or dir re-opened by
- * another process, don't start statahead, otherwise the newly spawned
- * statahead thread won't be notified to quit.
- */
- spin_lock(&lli->lli_sa_lock);
- if (unlikely(lli->lli_sai || lli->lli_opendir_key ||
- lli->lli_opendir_pid != current->pid)) {
- spin_unlock(&lli->lli_sa_lock);
- rc = -EPERM;
- goto out;
- }
- lli->lli_sai = sai;
- spin_unlock(&lli->lli_sa_lock);
-
- atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_running);
-
- CDEBUG(D_READA, "start statahead thread: [pid %d] [parent %pd]\n",
- current_pid(), parent);
-
- task = kthread_create(ll_statahead_thread, parent, "ll_sa_%u",
- lli->lli_opendir_pid);
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("can't start ll_sa thread, rc : %d\n", rc);
- goto out;
- }
-
- if (ll_i2sbi(parent->d_inode)->ll_flags & LL_SBI_AGL_ENABLED)
- ll_start_agl(parent, sai);
-
- atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_total);
- sai->sai_task = task;
-
- wake_up_process(task);
-
- /*
- * We don't stat-ahead for the first dirent since we are already in
- * lookup.
- */
- return -EAGAIN;
-
-out:
- /*
- * once we start statahead thread failed, disable statahead so
- * that subsequent stat won't waste time to try it.
- */
- spin_lock(&lli->lli_sa_lock);
- lli->lli_sa_enabled = 0;
- lli->lli_sai = NULL;
- spin_unlock(&lli->lli_sa_lock);
- if (sai)
- ll_sai_free(sai);
- return rc;
-}
-
-/**
- * statahead entry function, this is called when client getattr on a file, it
- * will start statahead thread if this is the first dir entry, else revalidate
- * dentry from statahead cache.
- *
- * \param[in] dir parent directory
- * \param[out] dentryp dentry to getattr
- * \param[in] unplug unplug statahead window only (normally for negative
- * dentry)
- * \retval 1 on success
- * \retval 0 revalidation from statahead cache failed, caller needs
- * to getattr from server directly
- * \retval negative number on error, caller often ignores this and
- * then getattr from server
- */
-int ll_statahead(struct inode *dir, struct dentry **dentryp, bool unplug)
-{
- struct ll_statahead_info *sai;
-
- sai = ll_sai_get(dir);
- if (sai) {
- int rc;
-
- rc = revalidate_statahead_dentry(dir, sai, dentryp, unplug);
- CDEBUG(D_READA, "revalidate statahead %pd: %d.\n",
- *dentryp, rc);
- ll_sai_put(sai);
- return rc;
- }
- return start_statahead_thread(dir, *dentryp);
-}
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
deleted file mode 100644
index 861e7a60f408..000000000000
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <lustre_ha.h>
-#include <lustre_dlm.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <lprocfs_status.h>
-#include "llite_internal.h"
-
-static struct kmem_cache *ll_inode_cachep;
-
-static struct inode *ll_alloc_inode(struct super_block *sb)
-{
- struct ll_inode_info *lli;
-
- ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_ALLOC_INODE, 1);
- lli = kmem_cache_zalloc(ll_inode_cachep, GFP_NOFS);
- if (!lli)
- return NULL;
-
- inode_init_once(&lli->lli_vfs_inode);
- return &lli->lli_vfs_inode;
-}
-
-static void ll_inode_destroy_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- struct ll_inode_info *ptr = ll_i2info(inode);
-
- kmem_cache_free(ll_inode_cachep, ptr);
-}
-
-static void ll_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, ll_inode_destroy_callback);
-}
-
-/* exported operations */
-struct super_operations lustre_super_operations = {
- .alloc_inode = ll_alloc_inode,
- .destroy_inode = ll_destroy_inode,
- .evict_inode = ll_delete_inode,
- .put_super = ll_put_super,
- .statfs = ll_statfs,
- .umount_begin = ll_umount_begin,
- .remount_fs = ll_remount_fs,
- .show_options = ll_show_options,
-};
-MODULE_ALIAS_FS("lustre");
-
-static int __init lustre_init(void)
-{
- int rc;
-
- BUILD_BUG_ON(sizeof(LUSTRE_VOLATILE_HDR) !=
- LUSTRE_VOLATILE_HDR_LEN + 1);
-
- /* print an address of _any_ initialized kernel symbol from this
- * module, to allow debugging with gdb that doesn't support data
- * symbols from modules.
- */
- CDEBUG(D_INFO, "Lustre client module (%p).\n",
- &lustre_super_operations);
-
- rc = -ENOMEM;
- ll_inode_cachep = kmem_cache_create("lustre_inode_cache",
- sizeof(struct ll_inode_info), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
- NULL);
- if (!ll_inode_cachep)
- goto out_cache;
-
- ll_file_data_slab = kmem_cache_create("ll_file_data",
- sizeof(struct ll_file_data), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!ll_file_data_slab)
- goto out_cache;
-
- llite_root = debugfs_create_dir("llite", debugfs_lustre_root);
- if (IS_ERR_OR_NULL(llite_root)) {
- rc = llite_root ? PTR_ERR(llite_root) : -ENOMEM;
- llite_root = NULL;
- goto out_cache;
- }
-
- llite_kset = kset_create_and_add("llite", NULL, lustre_kobj);
- if (!llite_kset) {
- rc = -ENOMEM;
- goto out_debugfs;
- }
-
- rc = vvp_global_init();
- if (rc != 0)
- goto out_sysfs;
-
- cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck,
- LCT_REMEMBER | LCT_NOREF);
- if (IS_ERR(cl_inode_fini_env)) {
- rc = PTR_ERR(cl_inode_fini_env);
- goto out_vvp;
- }
-
- cl_inode_fini_env->le_ctx.lc_cookie = 0x4;
-
- rc = ll_xattr_init();
- if (rc != 0)
- goto out_inode_fini_env;
-
- lustre_register_super_ops(THIS_MODULE, ll_fill_super, ll_kill_super);
- lustre_register_client_process_config(ll_process_config);
-
- return 0;
-
-out_inode_fini_env:
- cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
-out_vvp:
- vvp_global_fini();
-out_sysfs:
- kset_unregister(llite_kset);
-out_debugfs:
- debugfs_remove(llite_root);
-out_cache:
- kmem_cache_destroy(ll_inode_cachep);
- kmem_cache_destroy(ll_file_data_slab);
- return rc;
-}
-
-static void __exit lustre_exit(void)
-{
- lustre_register_super_ops(NULL, NULL, NULL);
- lustre_register_client_process_config(NULL);
-
- debugfs_remove(llite_root);
- kset_unregister(llite_kset);
-
- ll_xattr_fini();
- cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
- vvp_global_fini();
-
- kmem_cache_destroy(ll_inode_cachep);
- kmem_cache_destroy(ll_file_data_slab);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Client File System");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(lustre_init);
-module_exit(lustre_exit);
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
deleted file mode 100644
index 0690fdbf49f5..000000000000
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/stat.h>
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-static int ll_readlink_internal(struct inode *inode,
- struct ptlrpc_request **request, char **symname)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc, symlen = i_size_read(inode) + 1;
- struct mdt_body *body;
- struct md_op_data *op_data;
-
- *request = NULL;
-
- if (lli->lli_symlink_name) {
- int print_limit = min_t(int, PAGE_SIZE - 128, symlen);
-
- *symname = lli->lli_symlink_name;
- /* If the total CDEBUG() size is larger than a page, it
- * will print a warning to the console, avoid this by
- * printing just the last part of the symlink.
- */
- CDEBUG(D_INODE, "using cached symlink %s%.*s, len = %d\n",
- print_limit < symlen ? "..." : "", print_limit,
- (*symname) + symlen - print_limit, symlen);
- return 0;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, symlen,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_LINKNAME;
- rc = md_getattr(sbi->ll_md_exp, op_data, request);
- ll_finish_md_op_data(op_data);
- if (rc) {
- if (rc != -ENOENT)
- CERROR("%s: inode " DFID ": rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), rc);
- goto failed;
- }
-
- body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
- if ((body->mbo_valid & OBD_MD_LINKNAME) == 0) {
- CERROR("OBD_MD_LINKNAME not set on reply\n");
- rc = -EPROTO;
- goto failed;
- }
-
- LASSERT(symlen != 0);
- if (body->mbo_eadatasize != symlen) {
- CERROR("%s: inode " DFID ": symlink length %d not expected %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), body->mbo_eadatasize - 1,
- symlen - 1);
- rc = -EPROTO;
- goto failed;
- }
-
- *symname = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_MD);
- if (!*symname ||
- strnlen(*symname, symlen) != symlen - 1) {
- /* not full/NULL terminated */
- CERROR("inode %lu: symlink not NULL terminated string of length %d\n",
- inode->i_ino, symlen - 1);
- rc = -EPROTO;
- goto failed;
- }
-
- lli->lli_symlink_name = kzalloc(symlen, GFP_NOFS);
- /* do not return an error if we cannot cache the symlink locally */
- if (lli->lli_symlink_name) {
- memcpy(lli->lli_symlink_name, *symname, symlen);
- *symname = lli->lli_symlink_name;
- }
- return 0;
-
-failed:
- return rc;
-}
-
-static void ll_put_link(void *p)
-{
- ptlrpc_req_finished(p);
-}
-
-static const char *ll_get_link(struct dentry *dentry,
- struct inode *inode,
- struct delayed_call *done)
-{
- struct ptlrpc_request *request = NULL;
- int rc;
- char *symname = NULL;
-
- if (!dentry)
- return ERR_PTR(-ECHILD);
-
- CDEBUG(D_VFSTRACE, "VFS Op\n");
- ll_inode_size_lock(inode);
- rc = ll_readlink_internal(inode, &request, &symname);
- ll_inode_size_unlock(inode);
- if (rc) {
- ptlrpc_req_finished(request);
- return ERR_PTR(rc);
- }
-
- /* symname may contain a pointer to the request message buffer,
- * we delay request releasing then.
- */
- set_delayed_call(done, ll_put_link, request);
- return symname;
-}
-
-const struct inode_operations ll_fast_symlink_inode_operations = {
- .setattr = ll_setattr,
- .get_link = ll_get_link,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
-};
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
deleted file mode 100644
index 987c03b058e6..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ /dev/null
@@ -1,659 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl_device and cl_device_type implementation for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd.h>
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Vvp device and device type functions.
- *
- */
-
-/*
- * vvp_ prefix stands for "Vfs Vm Posix". It corresponds to historical
- * "llite_" (var. "ll_") prefix.
- */
-
-static struct kmem_cache *ll_thread_kmem;
-struct kmem_cache *vvp_lock_kmem;
-struct kmem_cache *vvp_object_kmem;
-static struct kmem_cache *vvp_session_kmem;
-static struct kmem_cache *vvp_thread_kmem;
-
-static struct lu_kmem_descr vvp_caches[] = {
- {
- .ckd_cache = &ll_thread_kmem,
- .ckd_name = "ll_thread_kmem",
- .ckd_size = sizeof(struct ll_thread_info),
- },
- {
- .ckd_cache = &vvp_lock_kmem,
- .ckd_name = "vvp_lock_kmem",
- .ckd_size = sizeof(struct vvp_lock),
- },
- {
- .ckd_cache = &vvp_object_kmem,
- .ckd_name = "vvp_object_kmem",
- .ckd_size = sizeof(struct vvp_object),
- },
- {
- .ckd_cache = &vvp_session_kmem,
- .ckd_name = "vvp_session_kmem",
- .ckd_size = sizeof(struct vvp_session)
- },
- {
- .ckd_cache = &vvp_thread_kmem,
- .ckd_name = "vvp_thread_kmem",
- .ckd_size = sizeof(struct vvp_thread_info),
- },
- {
- .ckd_cache = NULL
- }
-};
-
-static void *ll_thread_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct vvp_thread_info *info;
-
- info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS);
- if (!info)
- info = ERR_PTR(-ENOMEM);
- return info;
-}
-
-static void ll_thread_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct vvp_thread_info *info = data;
-
- kmem_cache_free(ll_thread_kmem, info);
-}
-
-struct lu_context_key ll_thread_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = ll_thread_key_init,
- .lct_fini = ll_thread_key_fini
-};
-
-static void *vvp_session_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct vvp_session *session;
-
- session = kmem_cache_zalloc(vvp_session_kmem, GFP_NOFS);
- if (!session)
- session = ERR_PTR(-ENOMEM);
- return session;
-}
-
-static void vvp_session_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct vvp_session *session = data;
-
- kmem_cache_free(vvp_session_kmem, session);
-}
-
-struct lu_context_key vvp_session_key = {
- .lct_tags = LCT_SESSION,
- .lct_init = vvp_session_key_init,
- .lct_fini = vvp_session_key_fini
-};
-
-static void *vvp_thread_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct vvp_thread_info *vti;
-
- vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
- if (!vti)
- vti = ERR_PTR(-ENOMEM);
- return vti;
-}
-
-static void vvp_thread_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct vvp_thread_info *vti = data;
-
- kmem_cache_free(vvp_thread_kmem, vti);
-}
-
-struct lu_context_key vvp_thread_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = vvp_thread_key_init,
- .lct_fini = vvp_thread_key_fini
-};
-
-/* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
-LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key);
-
-static const struct lu_device_operations vvp_lu_ops = {
- .ldo_object_alloc = vvp_object_alloc
-};
-
-static struct lu_device *vvp_device_free(const struct lu_env *env,
- struct lu_device *d)
-{
- struct vvp_device *vdv = lu2vvp_dev(d);
- struct cl_site *site = lu2cl_site(d->ld_site);
- struct lu_device *next = cl2lu_dev(vdv->vdv_next);
-
- if (d->ld_site) {
- cl_site_fini(site);
- kfree(site);
- }
- cl_device_fini(lu2cl_dev(d));
- kfree(vdv);
- return next;
-}
-
-static struct lu_device *vvp_device_alloc(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *cfg)
-{
- struct vvp_device *vdv;
- struct lu_device *lud;
- struct cl_site *site;
- int rc;
-
- vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
- if (!vdv)
- return ERR_PTR(-ENOMEM);
-
- lud = &vdv->vdv_cl.cd_lu_dev;
- cl_device_init(&vdv->vdv_cl, t);
- vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops;
-
- site = kzalloc(sizeof(*site), GFP_NOFS);
- if (site) {
- rc = cl_site_init(site, &vdv->vdv_cl);
- if (rc == 0) {
- rc = lu_site_init_finish(&site->cs_lu);
- } else {
- LASSERT(!lud->ld_site);
- CERROR("Cannot init lu_site, rc %d.\n", rc);
- kfree(site);
- }
- } else {
- rc = -ENOMEM;
- }
- if (rc != 0) {
- vvp_device_free(env, lud);
- lud = ERR_PTR(rc);
- }
- return lud;
-}
-
-static int vvp_device_init(const struct lu_env *env, struct lu_device *d,
- const char *name, struct lu_device *next)
-{
- struct vvp_device *vdv;
- int rc;
-
- vdv = lu2vvp_dev(d);
- vdv->vdv_next = lu2cl_dev(next);
-
- LASSERT(d->ld_site && next->ld_type);
- next->ld_site = d->ld_site;
- rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
- next->ld_type->ldt_name,
- NULL);
- if (rc == 0) {
- lu_device_get(next);
- lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
- }
- return rc;
-}
-
-static struct lu_device *vvp_device_fini(const struct lu_env *env,
- struct lu_device *d)
-{
- return cl2lu_dev(lu2vvp_dev(d)->vdv_next);
-}
-
-static const struct lu_device_type_operations vvp_device_type_ops = {
- .ldto_init = vvp_type_init,
- .ldto_fini = vvp_type_fini,
-
- .ldto_start = vvp_type_start,
- .ldto_stop = vvp_type_stop,
-
- .ldto_device_alloc = vvp_device_alloc,
- .ldto_device_free = vvp_device_free,
- .ldto_device_init = vvp_device_init,
- .ldto_device_fini = vvp_device_fini,
-};
-
-struct lu_device_type vvp_device_type = {
- .ldt_tags = LU_DEVICE_CL,
- .ldt_name = LUSTRE_VVP_NAME,
- .ldt_ops = &vvp_device_type_ops,
- .ldt_ctx_tags = LCT_CL_THREAD
-};
-
-/**
- * A mutex serializing calls to vvp_inode_fini() under extreme memory
- * pressure, when environments cannot be allocated.
- */
-int vvp_global_init(void)
-{
- int rc;
-
- rc = lu_kmem_init(vvp_caches);
- if (rc != 0)
- return rc;
-
- rc = lu_device_type_init(&vvp_device_type);
- if (rc != 0)
- goto out_kmem;
-
- return 0;
-
-out_kmem:
- lu_kmem_fini(vvp_caches);
-
- return rc;
-}
-
-void vvp_global_fini(void)
-{
- lu_device_type_fini(&vvp_device_type);
- lu_kmem_fini(vvp_caches);
-}
-
-/*****************************************************************************
- *
- * mirror obd-devices into cl devices.
- *
- */
-
-int cl_sb_init(struct super_block *sb)
-{
- struct ll_sb_info *sbi;
- struct cl_device *cl;
- struct lu_env *env;
- int rc = 0;
- u16 refcheck;
-
- sbi = ll_s2sbi(sb);
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- cl = cl_type_setup(env, NULL, &vvp_device_type,
- sbi->ll_dt_exp->exp_obd->obd_lu_dev);
- if (!IS_ERR(cl)) {
- sbi->ll_cl = cl;
- sbi->ll_site = cl2lu_dev(cl)->ld_site;
- }
- cl_env_put(env, &refcheck);
- } else {
- rc = PTR_ERR(env);
- }
- return rc;
-}
-
-int cl_sb_fini(struct super_block *sb)
-{
- struct ll_sb_info *sbi;
- struct lu_env *env;
- struct cl_device *cld;
- u16 refcheck;
- int result;
-
- sbi = ll_s2sbi(sb);
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- cld = sbi->ll_cl;
-
- if (cld) {
- cl_stack_fini(env, cld);
- sbi->ll_cl = NULL;
- sbi->ll_site = NULL;
- }
- cl_env_put(env, &refcheck);
- result = 0;
- } else {
- CERROR("Cannot cleanup cl-stack due to memory shortage.\n");
- result = PTR_ERR(env);
- }
- return result;
-}
-
-/****************************************************************************
- *
- * debugfs/lustre/llite/$MNT/dump_page_cache
- *
- ****************************************************************************/
-
-/*
- * To represent contents of a page cache as a byte stream, following
- * information if encoded in 64bit offset:
- *
- * - file hash bucket in lu_site::ls_hash[] 28bits
- *
- * - how far file is from bucket head 4bits
- *
- * - page index 32bits
- *
- * First two data identify a file in the cache uniquely.
- */
-
-#define PGC_OBJ_SHIFT (32 + 4)
-#define PGC_DEPTH_SHIFT (32)
-
-struct vvp_pgcache_id {
- unsigned int vpi_bucket;
- unsigned int vpi_depth;
- u32 vpi_index;
-
- unsigned int vpi_curdep;
- struct lu_object_header *vpi_obj;
-};
-
-static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id)
-{
- BUILD_BUG_ON(sizeof(pos) != sizeof(__u64));
-
- id->vpi_index = pos & 0xffffffff;
- id->vpi_depth = (pos >> PGC_DEPTH_SHIFT) & 0xf;
- id->vpi_bucket = (unsigned long long)pos >> PGC_OBJ_SHIFT;
-}
-
-static loff_t vvp_pgcache_id_pack(struct vvp_pgcache_id *id)
-{
- return
- ((__u64)id->vpi_index) |
- ((__u64)id->vpi_depth << PGC_DEPTH_SHIFT) |
- ((__u64)id->vpi_bucket << PGC_OBJ_SHIFT);
-}
-
-static int vvp_pgcache_obj_get(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *data)
-{
- struct vvp_pgcache_id *id = data;
- struct lu_object_header *hdr = cfs_hash_object(hs, hnode);
-
- if (id->vpi_curdep-- > 0)
- return 0; /* continue */
-
- if (lu_object_is_dying(hdr))
- return 1;
-
- cfs_hash_get(hs, hnode);
- id->vpi_obj = hdr;
- return 1;
-}
-
-static struct cl_object *vvp_pgcache_obj(const struct lu_env *env,
- struct lu_device *dev,
- struct vvp_pgcache_id *id)
-{
- LASSERT(lu_device_is_cl(dev));
-
- id->vpi_depth &= 0xf;
- id->vpi_obj = NULL;
- id->vpi_curdep = id->vpi_depth;
-
- cfs_hash_hlist_for_each(dev->ld_site->ls_obj_hash, id->vpi_bucket,
- vvp_pgcache_obj_get, id);
- if (id->vpi_obj) {
- struct lu_object *lu_obj;
-
- lu_obj = lu_object_locate(id->vpi_obj, dev->ld_type);
- if (lu_obj) {
- lu_object_ref_add(lu_obj, "dump", current);
- return lu2cl(lu_obj);
- }
- lu_object_put(env, lu_object_top(id->vpi_obj));
-
- } else if (id->vpi_curdep > 0) {
- id->vpi_depth = 0xf;
- }
- return NULL;
-}
-
-static loff_t vvp_pgcache_find(const struct lu_env *env,
- struct lu_device *dev, loff_t pos)
-{
- struct cl_object *clob;
- struct lu_site *site;
- struct vvp_pgcache_id id;
-
- site = dev->ld_site;
- vvp_pgcache_id_unpack(pos, &id);
-
- while (1) {
- if (id.vpi_bucket >= CFS_HASH_NHLIST(site->ls_obj_hash))
- return ~0ULL;
- clob = vvp_pgcache_obj(env, dev, &id);
- if (clob) {
- struct inode *inode = vvp_object_inode(clob);
- struct page *vmpage;
- int nr;
-
- nr = find_get_pages_contig(inode->i_mapping,
- id.vpi_index, 1, &vmpage);
- if (nr > 0) {
- id.vpi_index = vmpage->index;
- /* Cant support over 16T file */
- nr = !(vmpage->index > 0xffffffff);
- put_page(vmpage);
- }
-
- lu_object_ref_del(&clob->co_lu, "dump", current);
- cl_object_put(env, clob);
- if (nr > 0)
- return vvp_pgcache_id_pack(&id);
- }
- /* to the next object. */
- ++id.vpi_depth;
- id.vpi_depth &= 0xf;
- if (id.vpi_depth == 0 && ++id.vpi_bucket == 0)
- return ~0ULL;
- id.vpi_index = 0;
- }
-}
-
-#define seq_page_flag(seq, page, flag, has_flags) do { \
- if (test_bit(PG_##flag, &(page)->flags)) { \
- seq_printf(seq, "%s"#flag, has_flags ? "|" : ""); \
- has_flags = 1; \
- } \
-} while (0)
-
-static void vvp_pgcache_page_show(const struct lu_env *env,
- struct seq_file *seq, struct cl_page *page)
-{
- struct vvp_page *vpg;
- struct page *vmpage;
- int has_flags;
-
- vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
- vmpage = vpg->vpg_page;
- seq_printf(seq, " %5i | %p %p %s %s %s | %p " DFID "(%p) %lu %u [",
- 0 /* gen */,
- vpg, page,
- "none",
- vpg->vpg_defer_uptodate ? "du" : "- ",
- PageWriteback(vmpage) ? "wb" : "-",
- vmpage, PFID(ll_inode2fid(vmpage->mapping->host)),
- vmpage->mapping->host, vmpage->index,
- page_count(vmpage));
- has_flags = 0;
- seq_page_flag(seq, vmpage, locked, has_flags);
- seq_page_flag(seq, vmpage, error, has_flags);
- seq_page_flag(seq, vmpage, referenced, has_flags);
- seq_page_flag(seq, vmpage, uptodate, has_flags);
- seq_page_flag(seq, vmpage, dirty, has_flags);
- seq_page_flag(seq, vmpage, writeback, has_flags);
- seq_printf(seq, "%s]\n", has_flags ? "" : "-");
-}
-
-static int vvp_pgcache_show(struct seq_file *f, void *v)
-{
- loff_t pos;
- struct ll_sb_info *sbi;
- struct cl_object *clob;
- struct lu_env *env;
- struct vvp_pgcache_id id;
- u16 refcheck;
- int result;
-
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- pos = *(loff_t *)v;
- vvp_pgcache_id_unpack(pos, &id);
- sbi = f->private;
- clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
- if (clob) {
- struct inode *inode = vvp_object_inode(clob);
- struct cl_page *page = NULL;
- struct page *vmpage;
-
- result = find_get_pages_contig(inode->i_mapping,
- id.vpi_index, 1,
- &vmpage);
- if (result > 0) {
- lock_page(vmpage);
- page = cl_vmpage_page(vmpage, clob);
- unlock_page(vmpage);
- put_page(vmpage);
- }
-
- seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
- PFID(lu_object_fid(&clob->co_lu)));
- if (page) {
- vvp_pgcache_page_show(env, f, page);
- cl_page_put(env, page);
- } else {
- seq_puts(f, "missing\n");
- }
- lu_object_ref_del(&clob->co_lu, "dump", current);
- cl_object_put(env, clob);
- } else {
- seq_printf(f, "%llx missing\n", pos);
- }
- cl_env_put(env, &refcheck);
- result = 0;
- } else {
- result = PTR_ERR(env);
- }
- return result;
-}
-
-static void *vvp_pgcache_start(struct seq_file *f, loff_t *pos)
-{
- struct ll_sb_info *sbi;
- struct lu_env *env;
- u16 refcheck;
-
- sbi = f->private;
-
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- sbi = f->private;
- if (sbi->ll_site->ls_obj_hash->hs_cur_bits >
- 64 - PGC_OBJ_SHIFT) {
- pos = ERR_PTR(-EFBIG);
- } else {
- *pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev,
- *pos);
- if (*pos == ~0ULL)
- pos = NULL;
- }
- cl_env_put(env, &refcheck);
- }
- return pos;
-}
-
-static void *vvp_pgcache_next(struct seq_file *f, void *v, loff_t *pos)
-{
- struct ll_sb_info *sbi;
- struct lu_env *env;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- sbi = f->private;
- *pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev, *pos + 1);
- if (*pos == ~0ULL)
- pos = NULL;
- cl_env_put(env, &refcheck);
- }
- return pos;
-}
-
-static void vvp_pgcache_stop(struct seq_file *f, void *v)
-{
- /* Nothing to do */
-}
-
-static const struct seq_operations vvp_pgcache_ops = {
- .start = vvp_pgcache_start,
- .next = vvp_pgcache_next,
- .stop = vvp_pgcache_stop,
- .show = vvp_pgcache_show
-};
-
-static int vvp_dump_pgcache_seq_open(struct inode *inode, struct file *filp)
-{
- struct seq_file *seq;
- int rc;
-
- rc = seq_open(filp, &vvp_pgcache_ops);
- if (rc)
- return rc;
-
- seq = filp->private_data;
- seq->private = inode->i_private;
-
- return 0;
-}
-
-const struct file_operations vvp_dump_pgcache_file_ops = {
- .owner = THIS_MODULE,
- .open = vvp_dump_pgcache_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
deleted file mode 100644
index 02ea5161d635..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ /dev/null
@@ -1,321 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2013, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Internal definitions for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#ifndef VVP_INTERNAL_H
-#define VVP_INTERNAL_H
-
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <cl_object.h>
-
-enum obd_notify_event;
-struct inode;
-struct lustre_md;
-struct obd_device;
-struct obd_export;
-struct page;
-
-/**
- * IO state private to IO state private to VVP layer.
- */
-struct vvp_io {
- /** super class */
- struct cl_io_slice vui_cl;
- struct cl_io_lock_link vui_link;
- /**
- * I/O vector information to or from which read/write is going.
- */
- struct iov_iter *vui_iter;
- /**
- * Total size for the left IO.
- */
- size_t vui_tot_count;
-
- union {
- struct vvp_fault_io {
- /**
- * Inode modification time that is checked across DLM
- * lock request.
- */
- time64_t ft_mtime;
- struct vm_area_struct *ft_vma;
- /**
- * locked page returned from vvp_io
- */
- struct page *ft_vmpage;
- /**
- * kernel fault info
- */
- struct vm_fault *ft_vmf;
- /**
- * fault API used bitflags for return code.
- */
- unsigned int ft_flags;
- /**
- * check that flags are from filemap_fault
- */
- bool ft_flags_valid;
- } fault;
- struct {
- struct cl_page_list vui_queue;
- unsigned long vui_written;
- int vui_from;
- int vui_to;
- } write;
- } u;
-
- /**
- * Layout version when this IO is initialized
- */
- __u32 vui_layout_gen;
- /**
- * File descriptor against which IO is done.
- */
- struct ll_file_data *vui_fd;
- struct kiocb *vui_iocb;
-
- /* Readahead state. */
- pgoff_t vui_ra_start;
- pgoff_t vui_ra_count;
- /* Set when vui_ra_{start,count} have been initialized. */
- bool vui_ra_valid;
-};
-
-extern struct lu_device_type vvp_device_type;
-
-extern struct lu_context_key vvp_session_key;
-extern struct lu_context_key vvp_thread_key;
-
-extern struct kmem_cache *vvp_lock_kmem;
-extern struct kmem_cache *vvp_object_kmem;
-
-struct vvp_thread_info {
- struct cl_lock vti_lock;
- struct cl_lock_descr vti_descr;
- struct cl_io vti_io;
- struct cl_attr vti_attr;
-};
-
-static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
-{
- struct vvp_thread_info *vti;
-
- vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key);
- LASSERT(vti);
-
- return vti;
-}
-
-static inline struct cl_lock *vvp_env_lock(const struct lu_env *env)
-{
- struct cl_lock *lock = &vvp_env_info(env)->vti_lock;
-
- memset(lock, 0, sizeof(*lock));
- return lock;
-}
-
-static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env)
-{
- struct cl_attr *attr = &vvp_env_info(env)->vti_attr;
-
- memset(attr, 0, sizeof(*attr));
-
- return attr;
-}
-
-static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env)
-{
- struct cl_io *io = &vvp_env_info(env)->vti_io;
-
- memset(io, 0, sizeof(*io));
-
- return io;
-}
-
-struct vvp_session {
- struct vvp_io cs_ios;
-};
-
-static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
-{
- struct vvp_session *ses;
-
- ses = lu_context_key_get(env->le_ses, &vvp_session_key);
- LASSERT(ses);
-
- return ses;
-}
-
-static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
-{
- return &vvp_env_session(env)->cs_ios;
-}
-
-/**
- * ccc-private object state.
- */
-struct vvp_object {
- struct cl_object_header vob_header;
- struct cl_object vob_cl;
- struct inode *vob_inode;
-
- /**
- * Number of transient pages. This is no longer protected by i_sem,
- * and needs to be atomic. This is not actually used for anything,
- * and can probably be removed.
- */
- atomic_t vob_transient_pages;
-
- /**
- * Number of outstanding mmaps on this file.
- *
- * \see ll_vm_open(), ll_vm_close().
- */
- atomic_t vob_mmap_cnt;
-
- /**
- * various flags
- * vob_discard_page_warned
- * if pages belonging to this object are discarded when a client
- * is evicted, some debug info will be printed, this flag will be set
- * during processing the first discarded page, then avoid flooding
- * debug message for lots of discarded pages.
- *
- * \see ll_dirty_page_discard_warn.
- */
- unsigned int vob_discard_page_warned:1;
-};
-
-/**
- * VVP-private page state.
- */
-struct vvp_page {
- struct cl_page_slice vpg_cl;
- unsigned int vpg_defer_uptodate:1,
- vpg_ra_used:1;
- /** VM page */
- struct page *vpg_page;
-};
-
-static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice)
-{
- return container_of(slice, struct vvp_page, vpg_cl);
-}
-
-static inline pgoff_t vvp_index(struct vvp_page *vvp)
-{
- return vvp->vpg_cl.cpl_index;
-}
-
-struct vvp_device {
- struct cl_device vdv_cl;
- struct cl_device *vdv_next;
-};
-
-struct vvp_lock {
- struct cl_lock_slice vlk_cl;
-};
-
-void *ccc_key_init(const struct lu_context *ctx,
- struct lu_context_key *key);
-void ccc_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data);
-
-void ccc_umount(const struct lu_env *env, struct cl_device *dev);
-
-static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv)
-{
- return &vdv->vdv_cl.cd_lu_dev;
-}
-
-static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d)
-{
- return container_of0(d, struct vvp_device, vdv_cl.cd_lu_dev);
-}
-
-static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d)
-{
- return container_of0(d, struct vvp_device, vdv_cl);
-}
-
-static inline struct vvp_object *cl2vvp(const struct cl_object *obj)
-{
- return container_of0(obj, struct vvp_object, vob_cl);
-}
-
-static inline struct vvp_object *lu2vvp(const struct lu_object *obj)
-{
- return container_of0(obj, struct vvp_object, vob_cl.co_lu);
-}
-
-static inline struct inode *vvp_object_inode(const struct cl_object *obj)
-{
- return cl2vvp(obj)->vob_inode;
-}
-
-int vvp_object_invariant(const struct cl_object *obj);
-struct vvp_object *cl_inode2vvp(struct inode *inode);
-
-static inline struct page *cl2vm_page(const struct cl_page_slice *slice)
-{
- return cl2vvp_page(slice)->vpg_page;
-}
-
-static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice)
-{
- return container_of(slice, struct vvp_lock, vlk_cl);
-}
-
-# define CLOBINVRNT(env, clob, expr) \
- ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
-
-int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io);
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
-int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io);
-int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index);
-struct lu_object *vvp_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
- struct lu_device *dev);
-
-int vvp_global_init(void);
-void vvp_global_fini(void);
-
-extern const struct file_operations vvp_dump_pgcache_file_ops;
-
-#endif /* VVP_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
deleted file mode 100644
index e7a4778e02e4..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ /dev/null
@@ -1,1374 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_io for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-static struct vvp_io *cl2vvp_io(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct vvp_io *vio;
-
- vio = container_of(slice, struct vvp_io, vui_cl);
- LASSERT(vio == vvp_env_io(env));
-
- return vio;
-}
-
-/**
- * For swapping layout. The file's layout may have changed.
- * To avoid populating pages to a wrong stripe, we have to verify the
- * correctness of layout. It works because swapping layout processes
- * have to acquire group lock.
- */
-static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
- struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct vvp_io *vio = vvp_env_io(env);
- bool rc = true;
-
- switch (io->ci_type) {
- case CIT_READ:
- case CIT_WRITE:
- /* don't need lock here to check lli_layout_gen as we have held
- * extent lock and GROUP lock has to hold to swap layout
- */
- if (ll_layout_version_get(lli) != vio->vui_layout_gen ||
- OBD_FAIL_CHECK_RESET(OBD_FAIL_LLITE_LOST_LAYOUT, 0)) {
- io->ci_need_restart = 1;
- /* this will cause a short read/write */
- io->ci_continue = 0;
- rc = false;
- }
- case CIT_FAULT:
- /* fault is okay because we've already had a page. */
- default:
- break;
- }
-
- return rc;
-}
-
-static void vvp_object_size_lock(struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- ll_inode_size_lock(inode);
- cl_object_attr_lock(obj);
-}
-
-static void vvp_object_size_unlock(struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- cl_object_attr_unlock(obj);
- ll_inode_size_unlock(inode);
-}
-
-/**
- * Helper function that if necessary adjusts file size (inode->i_size), when
- * position at the offset \a pos is accessed. File size can be arbitrary stale
- * on a Lustre client, but client at least knows KMS. If accessed area is
- * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
- *
- * Locking: cl_isize_lock is used to serialize changes to inode size and to
- * protect consistency between inode size and cl_object
- * attributes. cl_object_size_lock() protects consistency between cl_attr's of
- * top-object and sub-objects.
- */
-static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, loff_t start, size_t count,
- int *exceed)
-{
- struct cl_attr *attr = vvp_env_thread_attr(env);
- struct inode *inode = vvp_object_inode(obj);
- loff_t pos = start + count - 1;
- loff_t kms;
- int result;
-
- /*
- * Consistency guarantees: following possibilities exist for the
- * relation between region being accessed and real file size at this
- * moment:
- *
- * (A): the region is completely inside of the file;
- *
- * (B-x): x bytes of region are inside of the file, the rest is
- * outside;
- *
- * (C): the region is completely outside of the file.
- *
- * This classification is stable under DLM lock already acquired by
- * the caller, because to change the class, other client has to take
- * DLM lock conflicting with our lock. Also, any updates to ->i_size
- * by other threads on this client are serialized by
- * ll_inode_size_lock(). This guarantees that short reads are handled
- * correctly in the face of concurrent writes and truncates.
- */
- vvp_object_size_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- if (result == 0) {
- kms = attr->cat_kms;
- if (pos > kms) {
- /*
- * A glimpse is necessary to determine whether we
- * return a short read (B) or some zeroes at the end
- * of the buffer (C)
- */
- vvp_object_size_unlock(obj);
- result = cl_glimpse_lock(env, io, inode, obj, 0);
- if (result == 0 && exceed) {
- /* If objective page index exceed end-of-file
- * page index, return directly. Do not expect
- * kernel will check such case correctly.
- * linux-2.6.18-128.1.1 miss to do that.
- * --bug 17336
- */
- loff_t size = i_size_read(inode);
- loff_t cur_index = start >> PAGE_SHIFT;
- loff_t size_index = (size - 1) >> PAGE_SHIFT;
-
- if ((size == 0 && cur_index != 0) ||
- size_index < cur_index)
- *exceed = 1;
- }
- return result;
- }
- /*
- * region is within kms and, hence, within real file
- * size (A). We need to increase i_size to cover the
- * read region so that generic_file_read() will do its
- * job, but that doesn't mean the kms size is
- * _correct_, it is only the _minimum_ size. If
- * someone does a stat they will get the correct size
- * which will always be >= the kms value here.
- * b=11081
- */
- if (i_size_read(inode) < kms) {
- i_size_write(inode, kms);
- CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
- PFID(lu_object_fid(&obj->co_lu)),
- (__u64)i_size_read(inode));
- }
- }
-
- vvp_object_size_unlock(obj);
-
- return result;
-}
-
-/*****************************************************************************
- *
- * io operations.
- *
- */
-
-static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- pgoff_t start, pgoff_t end)
-{
- struct vvp_io *vio = vvp_env_io(env);
- struct cl_lock_descr *descr = &vio->vui_link.cill_descr;
- struct cl_object *obj = io->ci_obj;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
-
- memset(&vio->vui_link, 0, sizeof(vio->vui_link));
-
- if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- descr->cld_mode = CLM_GROUP;
- descr->cld_gid = vio->vui_fd->fd_grouplock.lg_gid;
- enqflags |= CEF_LOCK_MATCH;
- } else {
- descr->cld_mode = mode;
- }
- descr->cld_obj = obj;
- descr->cld_start = start;
- descr->cld_end = end;
- descr->cld_enq_flags = enqflags;
-
- cl_io_lock_add(env, io, &vio->vui_link);
- return 0;
-}
-
-static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- loff_t start, loff_t end)
-{
- struct cl_object *obj = io->ci_obj;
-
- return vvp_io_one_lock_index(env, io, enqflags, mode,
- cl_index(obj, start), cl_index(obj, end));
-}
-
-static int vvp_io_write_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- cl_page_list_init(&vio->u.write.vui_queue);
- vio->u.write.vui_written = 0;
- vio->u.write.vui_from = 0;
- vio->u.write.vui_to = PAGE_SIZE;
-
- return 0;
-}
-
-static void vvp_io_write_iter_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- LASSERT(vio->u.write.vui_queue.pl_nr == 0);
-}
-
-static int vvp_io_fault_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct inode *inode = vvp_object_inode(ios->cis_obj);
-
- LASSERT(inode == file_inode(vio->vui_fd->fd_file));
- vio->u.fault.ft_mtime = inode->i_mtime.tv_sec;
- return 0;
-}
-
-static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct inode *inode = vvp_object_inode(obj);
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, DFID
- " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
- PFID(lu_object_fid(&obj->co_lu)),
- io->ci_ignore_layout, io->ci_verify_layout,
- vio->vui_layout_gen, io->ci_restore_needed);
-
- if (io->ci_restore_needed) {
- int rc;
-
- /* file was detected release, we need to restore it
- * before finishing the io
- */
- rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF);
- /* if restore registration failed, no restart,
- * we will return -ENODATA
- */
- /* The layout will change after restore, so we need to
- * block on layout lock hold by the MDT
- * as MDT will not send new layout in lvb (see LU-3124)
- * we have to explicitly fetch it, all this will be done
- * by ll_layout_refresh()
- */
- if (rc == 0) {
- io->ci_restore_needed = 0;
- io->ci_need_restart = 1;
- io->ci_verify_layout = 1;
- } else {
- io->ci_restore_needed = 1;
- io->ci_need_restart = 0;
- io->ci_verify_layout = 0;
- io->ci_result = rc;
- }
- }
-
- if (!io->ci_ignore_layout && io->ci_verify_layout) {
- __u32 gen = 0;
-
- /* check layout version */
- ll_layout_refresh(inode, &gen);
- io->ci_need_restart = vio->vui_layout_gen != gen;
- if (io->ci_need_restart) {
- CDEBUG(D_VFSTRACE,
- DFID " layout changed from %d to %d.\n",
- PFID(lu_object_fid(&obj->co_lu)),
- vio->vui_layout_gen, gen);
- /* today successful restore is the only possible case */
- /* restore was done, clear restoring state */
- clear_bit(LLIF_FILE_RESTORING,
- &ll_i2info(inode)->lli_flags);
- }
- }
-}
-
-static void vvp_io_fault_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_page *page = io->u.ci_fault.ft_page;
-
- CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj));
-
- if (page) {
- lu_ref_del(&page->cp_reference, "fault", io);
- cl_page_put(env, page);
- io->u.ci_fault.ft_page = NULL;
- }
- vvp_io_fini(env, ios);
-}
-
-static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma)
-{
- /*
- * we only want to hold PW locks if the mmap() can generate
- * writes back to the file and that only happens in shared
- * writable vmas
- */
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
- return CLM_WRITE;
- return CLM_READ;
-}
-
-static int vvp_mmap_locks(const struct lu_env *env,
- struct vvp_io *vio, struct cl_io *io)
-{
- struct vvp_thread_info *cti = vvp_env_info(env);
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- struct cl_lock_descr *descr = &cti->vti_descr;
- union ldlm_policy_data policy;
- unsigned long addr;
- ssize_t count;
- int result = 0;
- struct iov_iter i;
- struct iovec iov;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- if (!vio->vui_iter) /* nfs or loop back device write */
- return 0;
-
- /* No MM (e.g. NFS)? No vmas too. */
- if (!mm)
- return 0;
-
- iov_for_each(iov, i, *vio->vui_iter) {
- addr = (unsigned long)iov.iov_base;
- count = iov.iov_len;
- if (count == 0)
- continue;
-
- count += addr & (~PAGE_MASK);
- addr &= PAGE_MASK;
-
- down_read(&mm->mmap_sem);
- while ((vma = our_vma(mm, addr, count)) != NULL) {
- struct inode *inode = file_inode(vma->vm_file);
- int flags = CEF_MUST;
-
- if (ll_file_nolock(vma->vm_file)) {
- /*
- * For no lock case is not allowed for mmap
- */
- result = -EINVAL;
- break;
- }
-
- /*
- * XXX: Required lock mode can be weakened: CIT_WRITE
- * io only ever reads user level buffer, and CIT_READ
- * only writes on it.
- */
- policy_from_vma(&policy, vma, addr, count);
- descr->cld_mode = vvp_mode_from_vma(vma);
- descr->cld_obj = ll_i2info(inode)->lli_clob;
- descr->cld_start = cl_index(descr->cld_obj,
- policy.l_extent.start);
- descr->cld_end = cl_index(descr->cld_obj,
- policy.l_extent.end);
- descr->cld_enq_flags = flags;
- result = cl_io_lock_alloc_add(env, io, descr);
-
- CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
- descr->cld_mode, descr->cld_start,
- descr->cld_end);
-
- if (result < 0)
- break;
-
- if (vma->vm_end - addr >= count)
- break;
-
- count -= vma->vm_end - addr;
- addr = vma->vm_end;
- }
- up_read(&mm->mmap_sem);
- if (result < 0)
- break;
- }
- return result;
-}
-
-static void vvp_io_advance(const struct lu_env *env,
- const struct cl_io_slice *ios,
- size_t nob)
-{
- struct cl_object *obj = ios->cis_io->ci_obj;
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- vio->vui_tot_count -= nob;
- iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count);
-}
-
-static void vvp_io_update_iov(const struct lu_env *env,
- struct vvp_io *vio, struct cl_io *io)
-{
- size_t size = io->u.ci_rw.crw_count;
-
- if (!vio->vui_iter)
- return;
-
- iov_iter_truncate(vio->vui_iter, size);
-}
-
-static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
- enum cl_lock_mode mode, loff_t start, loff_t end)
-{
- struct vvp_io *vio = vvp_env_io(env);
- int result;
- int ast_flags = 0;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- vvp_io_update_iov(env, vio, io);
-
- if (io->u.ci_rw.crw_nonblock)
- ast_flags |= CEF_NONBLOCK;
- result = vvp_mmap_locks(env, vio, io);
- if (result == 0)
- result = vvp_io_one_lock(env, io, ast_flags, mode, start, end);
- return result;
-}
-
-static int vvp_io_read_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_io_rw_common *rd = &io->u.ci_rd.rd;
- int result;
-
- result = vvp_io_rw_lock(env, io, CLM_READ, rd->crw_pos,
- rd->crw_pos + rd->crw_count - 1);
-
- return result;
-}
-
-static int vvp_io_fault_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct vvp_io *vio = cl2vvp_io(env, ios);
- /*
- * XXX LDLM_FL_CBPENDING
- */
- return vvp_io_one_lock_index(env,
- io, 0,
- vvp_mode_from_vma(vio->u.fault.ft_vma),
- io->u.ci_fault.ft_index,
- io->u.ci_fault.ft_index);
-}
-
-static int vvp_io_write_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- loff_t start;
- loff_t end;
-
- if (io->u.ci_wr.wr_append) {
- start = 0;
- end = OBD_OBJECT_EOF;
- } else {
- start = io->u.ci_wr.wr.crw_pos;
- end = start + io->u.ci_wr.wr.crw_count - 1;
- }
- return vvp_io_rw_lock(env, io, CLM_WRITE, start, end);
-}
-
-static int vvp_io_setattr_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- return 0;
-}
-
-/**
- * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io.
- *
- * Handles "lockless io" mode when extent locking is done by server.
- */
-static int vvp_io_setattr_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- __u64 new_size;
- __u32 enqflags = 0;
-
- if (cl_io_is_trunc(io)) {
- new_size = io->u.ci_setattr.sa_attr.lvb_size;
- if (new_size == 0)
- enqflags = CEF_DISCARD_DATA;
- } else {
- unsigned int valid = io->u.ci_setattr.sa_valid;
-
- if (!(valid & TIMES_SET_FLAGS))
- return 0;
-
- if ((!(valid & ATTR_MTIME) ||
- io->u.ci_setattr.sa_attr.lvb_mtime >=
- io->u.ci_setattr.sa_attr.lvb_ctime) &&
- (!(valid & ATTR_ATIME) ||
- io->u.ci_setattr.sa_attr.lvb_atime >=
- io->u.ci_setattr.sa_attr.lvb_ctime))
- return 0;
- new_size = 0;
- }
-
- return vvp_io_one_lock(env, io, enqflags, CLM_WRITE,
- new_size, OBD_OBJECT_EOF);
-}
-
-static int vvp_do_vmtruncate(struct inode *inode, size_t size)
-{
- int result;
- /*
- * Only ll_inode_size_lock is taken at this level.
- */
- ll_inode_size_lock(inode);
- result = inode_newsize_ok(inode, size);
- if (result < 0) {
- ll_inode_size_unlock(inode);
- return result;
- }
- truncate_setsize(inode, size);
- ll_inode_size_unlock(inode);
- return result;
-}
-
-static int vvp_io_setattr_time(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct cl_attr *attr = vvp_env_thread_attr(env);
- int result;
- unsigned valid = CAT_CTIME;
-
- cl_object_attr_lock(obj);
- attr->cat_ctime = io->u.ci_setattr.sa_attr.lvb_ctime;
- if (io->u.ci_setattr.sa_valid & ATTR_ATIME_SET) {
- attr->cat_atime = io->u.ci_setattr.sa_attr.lvb_atime;
- valid |= CAT_ATIME;
- }
- if (io->u.ci_setattr.sa_valid & ATTR_MTIME_SET) {
- attr->cat_mtime = io->u.ci_setattr.sa_attr.lvb_mtime;
- valid |= CAT_MTIME;
- }
- result = cl_object_attr_update(env, obj, attr, valid);
- cl_object_attr_unlock(obj);
-
- return result;
-}
-
-static int vvp_io_setattr_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct inode *inode = vvp_object_inode(io->ci_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- if (cl_io_is_trunc(io)) {
- down_write(&lli->lli_trunc_sem);
- inode_lock(inode);
- inode_dio_wait(inode);
- } else {
- inode_lock(inode);
- }
-
- if (io->u.ci_setattr.sa_valid & TIMES_SET_FLAGS)
- return vvp_io_setattr_time(env, ios);
-
- return 0;
-}
-
-static void vvp_io_setattr_end(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct inode *inode = vvp_object_inode(io->ci_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- if (cl_io_is_trunc(io)) {
- /* Truncate in memory pages - they must be clean pages
- * because osc has already notified to destroy osc_extents.
- */
- vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
- inode_unlock(inode);
- up_write(&lli->lli_trunc_sem);
- } else {
- inode_unlock(inode);
- }
-}
-
-static void vvp_io_setattr_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- bool restore_needed = ios->cis_io->ci_restore_needed;
- struct inode *inode = vvp_object_inode(ios->cis_obj);
-
- vvp_io_fini(env, ios);
-
- if (restore_needed && !ios->cis_io->ci_restore_needed) {
- /* restore finished, set data modified flag for HSM */
- set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags);
- }
-}
-
-static int vvp_io_read_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct file *file = vio->vui_fd->fd_file;
-
- int result;
- loff_t pos = io->u.ci_rd.rd.crw_pos;
- long cnt = io->u.ci_rd.rd.crw_count;
- long tot = vio->vui_tot_count;
- int exceed = 0;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
-
- down_read(&lli->lli_trunc_sem);
-
- if (!can_populate_pages(env, io, inode))
- return 0;
-
- result = vvp_prep_size(env, obj, io, pos, tot, &exceed);
- if (result != 0)
- return result;
- if (exceed != 0)
- goto out;
-
- LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu,
- "Read ino %lu, %lu bytes, offset %lld, size %llu\n",
- inode->i_ino, cnt, pos, i_size_read(inode));
-
- /* turn off the kernel's read-ahead */
- vio->vui_fd->fd_file->f_ra.ra_pages = 0;
-
- /* initialize read-ahead window once per syscall */
- if (!vio->vui_ra_valid) {
- vio->vui_ra_valid = true;
- vio->vui_ra_start = cl_index(obj, pos);
- vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1);
- ll_ras_enter(file);
- }
-
- /* BUG: 5972 */
- file_accessed(file);
- LASSERT(vio->vui_iocb->ki_pos == pos);
- result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
-
-out:
- if (result >= 0) {
- if (result < cnt)
- io->ci_continue = 0;
- io->ci_nob += result;
- ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
- vio->vui_fd, pos, result, READ);
- result = 0;
- }
- return result;
-}
-
-static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *plist, int from, int to)
-{
- struct cl_2queue *queue = &io->ci_queue;
- struct cl_page *page;
- unsigned int bytes = 0;
- int rc = 0;
-
- if (plist->pl_nr == 0)
- return 0;
-
- if (from > 0 || to != PAGE_SIZE) {
- page = cl_page_list_first(plist);
- if (plist->pl_nr == 1) {
- cl_page_clip(env, page, from, to);
- } else {
- if (from > 0)
- cl_page_clip(env, page, from, PAGE_SIZE);
- if (to != PAGE_SIZE) {
- page = cl_page_list_last(plist);
- cl_page_clip(env, page, 0, to);
- }
- }
- }
-
- cl_2queue_init(queue);
- cl_page_list_splice(plist, &queue->c2_qin);
- rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0);
-
- /* plist is not sorted any more */
- cl_page_list_splice(&queue->c2_qin, plist);
- cl_page_list_splice(&queue->c2_qout, plist);
- cl_2queue_fini(env, queue);
-
- if (rc == 0) {
- /* calculate bytes */
- bytes = plist->pl_nr << PAGE_SHIFT;
- bytes -= from + PAGE_SIZE - to;
-
- while (plist->pl_nr > 0) {
- page = cl_page_list_first(plist);
- cl_page_list_del(env, plist, page);
-
- cl_page_clip(env, page, 0, PAGE_SIZE);
-
- SetPageUptodate(cl_page_vmpage(page));
- cl_page_disown(env, io, page);
-
- /* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
- }
- }
-
- return bytes > 0 ? bytes : rc;
-}
-
-static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
-{
- struct page *vmpage = page->cp_vmpage;
-
- SetPageUptodate(vmpage);
- set_page_dirty(vmpage);
-
- cl_page_disown(env, io, page);
-
- /* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io));
- cl_page_put(env, page);
-}
-
-/* make sure the page list is contiguous */
-static bool page_list_sanity_check(struct cl_object *obj,
- struct cl_page_list *plist)
-{
- struct cl_page *page;
- pgoff_t index = CL_PAGE_EOF;
-
- cl_page_list_for_each(page, plist) {
- struct vvp_page *vpg = cl_object_page_slice(obj, page);
-
- if (index == CL_PAGE_EOF) {
- index = vvp_index(vpg);
- continue;
- }
-
- ++index;
- if (index == vvp_index(vpg))
- continue;
-
- return false;
- }
- return true;
-}
-
-/* Return how many bytes have queued or written */
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io)
-{
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct vvp_io *vio = vvp_env_io(env);
- struct cl_page_list *queue = &vio->u.write.vui_queue;
- struct cl_page *page;
- int rc = 0;
- int bytes = 0;
- unsigned int npages = vio->u.write.vui_queue.pl_nr;
-
- if (npages == 0)
- return 0;
-
- CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n",
- npages, vio->u.write.vui_from, vio->u.write.vui_to);
-
- LASSERT(page_list_sanity_check(obj, queue));
-
- /* submit IO with async write */
- rc = cl_io_commit_async(env, io, queue,
- vio->u.write.vui_from, vio->u.write.vui_to,
- write_commit_callback);
- npages -= queue->pl_nr; /* already committed pages */
- if (npages > 0) {
- /* calculate how many bytes were written */
- bytes = npages << PAGE_SHIFT;
-
- /* first page */
- bytes -= vio->u.write.vui_from;
- if (queue->pl_nr == 0) /* last page */
- bytes -= PAGE_SIZE - vio->u.write.vui_to;
- LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages);
-
- vio->u.write.vui_written += bytes;
-
- CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n",
- npages, bytes, vio->u.write.vui_written);
-
- /* the first page must have been written. */
- vio->u.write.vui_from = 0;
- }
- LASSERT(page_list_sanity_check(obj, queue));
- LASSERT(ergo(rc == 0, queue->pl_nr == 0));
-
- /* out of quota, try sync write */
- if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) {
- rc = vvp_io_commit_sync(env, io, queue,
- vio->u.write.vui_from,
- vio->u.write.vui_to);
- if (rc > 0) {
- vio->u.write.vui_written += rc;
- rc = 0;
- }
- }
-
- /* update inode size */
- ll_merge_attr(env, inode);
-
- /* Now the pages in queue were failed to commit, discard them
- * unless they were dirtied before.
- */
- while (queue->pl_nr > 0) {
- page = cl_page_list_first(queue);
- cl_page_list_del(env, queue, page);
-
- if (!PageDirty(cl_page_vmpage(page)))
- cl_page_discard(env, io, page);
-
- cl_page_disown(env, io, page);
-
- /* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
- }
- cl_page_list_fini(env, queue);
-
- return rc;
-}
-
-static int vvp_io_write_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
- ssize_t result = 0;
- loff_t pos = io->u.ci_wr.wr.crw_pos;
- size_t cnt = io->u.ci_wr.wr.crw_count;
-
- down_read(&lli->lli_trunc_sem);
-
- if (!can_populate_pages(env, io, inode))
- return 0;
-
- if (cl_io_is_append(io)) {
- /*
- * PARALLEL IO This has to be changed for parallel IO doing
- * out-of-order writes.
- */
- ll_merge_attr(env, inode);
- pos = i_size_read(inode);
- io->u.ci_wr.wr.crw_pos = pos;
- vio->vui_iocb->ki_pos = pos;
- } else {
- LASSERT(vio->vui_iocb->ki_pos == pos);
- }
-
- CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
-
- /*
- * The maximum Lustre file size is variable, based on the OST maximum
- * object size and number of stripes. This needs another check in
- * addition to the VFS checks earlier.
- */
- if (pos + cnt > ll_file_maxbytes(inode)) {
- CDEBUG(D_INODE,
- "%s: file " DFID " offset %llu > maxbytes %llu\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), pos + cnt,
- ll_file_maxbytes(inode));
- return -EFBIG;
- }
-
- if (!vio->vui_iter) {
- /* from a temp io in ll_cl_init(). */
- result = 0;
- } else {
- /*
- * When using the locked AIO function (generic_file_aio_write())
- * testing has shown the inode mutex to be a limiting factor
- * with multi-threaded single shared file performance. To get
- * around this, we now use the lockless version. To maintain
- * consistency, proper locking to protect against writes,
- * trucates, etc. is handled in the higher layers of lustre.
- */
- bool lock_node = !IS_NOSEC(inode);
-
- if (lock_node)
- inode_lock(inode);
- result = __generic_file_write_iter(vio->vui_iocb,
- vio->vui_iter);
- if (lock_node)
- inode_unlock(inode);
-
- if (result > 0 || result == -EIOCBQUEUED)
- result = generic_write_sync(vio->vui_iocb, result);
- }
-
- if (result > 0) {
- result = vvp_io_write_commit(env, io);
- if (vio->u.write.vui_written > 0) {
- result = vio->u.write.vui_written;
- io->ci_nob += result;
-
- CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n",
- io->ci_nob, result);
- }
- }
- if (result > 0) {
- set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags);
-
- if (result < cnt)
- io->ci_continue = 0;
- ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
- vio->vui_fd, pos, result, WRITE);
- result = 0;
- }
- return result;
-}
-
-static void vvp_io_rw_end(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct inode *inode = vvp_object_inode(ios->cis_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- up_read(&lli->lli_trunc_sem);
-}
-
-static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
-{
- struct vm_fault *vmf = cfio->ft_vmf;
-
- cfio->ft_flags = filemap_fault(vmf);
- cfio->ft_flags_valid = 1;
-
- if (vmf->page) {
- CDEBUG(D_PAGE,
- "page %p map %p index %lu flags %lx count %u priv %0lx: got addr %p type NOPAGE\n",
- vmf->page, vmf->page->mapping, vmf->page->index,
- (long)vmf->page->flags, page_count(vmf->page),
- page_private(vmf->page), (void *)vmf->address);
- if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
- lock_page(vmf->page);
- cfio->ft_flags |= VM_FAULT_LOCKED;
- }
-
- cfio->ft_vmpage = vmf->page;
- return 0;
- }
-
- if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
- CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", (void *)vmf->address);
- return -EFAULT;
- }
-
- if (cfio->ft_flags & VM_FAULT_OOM) {
- CDEBUG(D_PAGE, "got addr %p - OOM\n", (void *)vmf->address);
- return -ENOMEM;
- }
-
- if (cfio->ft_flags & VM_FAULT_RETRY)
- return -EAGAIN;
-
- CERROR("Unknown error in page fault %d!\n", cfio->ft_flags);
- return -EINVAL;
-}
-
-static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
-{
- set_page_dirty(page->cp_vmpage);
-}
-
-static int vvp_io_fault_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_fault_io *fio = &io->u.ci_fault;
- struct vvp_fault_io *cfio = &vio->u.fault;
- loff_t offset;
- int result = 0;
- struct page *vmpage = NULL;
- struct cl_page *page;
- loff_t size;
- pgoff_t last_index;
-
- down_read(&lli->lli_trunc_sem);
-
- /* offset of the last byte on the page */
- offset = cl_offset(obj, fio->ft_index + 1) - 1;
- LASSERT(cl_index(obj, offset) == fio->ft_index);
- result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL);
- if (result != 0)
- return result;
-
- /* must return locked page */
- if (fio->ft_mkwrite) {
- LASSERT(cfio->ft_vmpage);
- lock_page(cfio->ft_vmpage);
- } else {
- result = vvp_io_kernel_fault(cfio);
- if (result != 0)
- return result;
- }
-
- vmpage = cfio->ft_vmpage;
- LASSERT(PageLocked(vmpage));
-
- if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE))
- ll_invalidate_page(vmpage);
-
- size = i_size_read(inode);
- /* Though we have already held a cl_lock upon this page, but
- * it still can be truncated locally.
- */
- if (unlikely((vmpage->mapping != inode->i_mapping) ||
- (page_offset(vmpage) > size))) {
- CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n");
-
- /* return +1 to stop cl_io_loop() and ll_fault() will catch
- * and retry.
- */
- result = 1;
- goto out;
- }
-
- last_index = cl_index(obj, size - 1);
-
- if (fio->ft_mkwrite) {
- /*
- * Capture the size while holding the lli_trunc_sem from above
- * we want to make sure that we complete the mkwrite action
- * while holding this lock. We need to make sure that we are
- * not past the end of the file.
- */
- if (last_index < fio->ft_index) {
- CDEBUG(D_PAGE,
- "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n",
- vmpage->mapping, fio->ft_index, last_index);
- /*
- * We need to return if we are
- * passed the end of the file. This will propagate
- * up the call stack to ll_page_mkwrite where
- * we will return VM_FAULT_NOPAGE. Any non-negative
- * value returned here will be silently
- * converted to 0. If the vmpage->mapping is null
- * the error code would be converted back to ENODATA
- * in ll_page_mkwrite0. Thus we return -ENODATA
- * to handle both cases
- */
- result = -ENODATA;
- goto out;
- }
- }
-
- page = cl_page_find(env, obj, fio->ft_index, vmpage, CPT_CACHEABLE);
- if (IS_ERR(page)) {
- result = PTR_ERR(page);
- goto out;
- }
-
- /* if page is going to be written, we should add this page into cache
- * earlier.
- */
- if (fio->ft_mkwrite) {
- wait_on_page_writeback(vmpage);
- if (!PageDirty(vmpage)) {
- struct cl_page_list *plist = &io->ci_queue.c2_qin;
- struct vvp_page *vpg = cl_object_page_slice(obj, page);
- int to = PAGE_SIZE;
-
- /* vvp_page_assume() calls wait_on_page_writeback(). */
- cl_page_assume(env, io, page);
-
- cl_page_list_init(plist);
- cl_page_list_add(plist, page);
-
- /* size fixup */
- if (last_index == vvp_index(vpg))
- to = size & ~PAGE_MASK;
-
- /* Do not set Dirty bit here so that in case IO is
- * started before the page is really made dirty, we
- * still have chance to detect it.
- */
- result = cl_io_commit_async(env, io, plist, 0, to,
- mkwrite_commit_callback);
- LASSERT(cl_page_is_owned(page, io));
- cl_page_list_fini(env, plist);
-
- vmpage = NULL;
- if (result < 0) {
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
-
- cl_page_put(env, page);
-
- /* we're in big trouble, what can we do now? */
- if (result == -EDQUOT)
- result = -ENOSPC;
- goto out;
- } else {
- cl_page_disown(env, io, page);
- }
- }
- }
-
- /*
- * The ft_index is only used in the case of
- * a mkwrite action. We need to check
- * our assertions are correct, since
- * we should have caught this above
- */
- LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index);
- if (fio->ft_index == last_index)
- /*
- * Last page is mapped partially.
- */
- fio->ft_nob = size - cl_offset(obj, fio->ft_index);
- else
- fio->ft_nob = cl_page_size(obj);
-
- lu_ref_add(&page->cp_reference, "fault", io);
- fio->ft_page = page;
-
-out:
- /* return unlocked vmpage to avoid deadlocking */
- if (vmpage)
- unlock_page(vmpage);
-
- cfio->ft_flags &= ~VM_FAULT_LOCKED;
-
- return result;
-}
-
-static void vvp_io_fault_end(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct inode *inode = vvp_object_inode(ios->cis_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- CLOBINVRNT(env, ios->cis_io->ci_obj,
- vvp_object_invariant(ios->cis_io->ci_obj));
- up_read(&lli->lli_trunc_sem);
-}
-
-static int vvp_io_fsync_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- /* we should mark TOWRITE bit to each dirty page in radix tree to
- * verify pages have been written, but this is difficult because of
- * race.
- */
- return 0;
-}
-
-static int vvp_io_read_ahead(const struct lu_env *env,
- const struct cl_io_slice *ios,
- pgoff_t start, struct cl_read_ahead *ra)
-{
- int result = 0;
-
- if (ios->cis_io->ci_type == CIT_READ ||
- ios->cis_io->ci_type == CIT_FAULT) {
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- ra->cra_end = CL_PAGE_EOF;
- result = 1; /* no need to call down */
- }
- }
-
- return result;
-}
-
-static const struct cl_io_operations vvp_io_ops = {
- .op = {
- [CIT_READ] = {
- .cio_fini = vvp_io_fini,
- .cio_lock = vvp_io_read_lock,
- .cio_start = vvp_io_read_start,
- .cio_end = vvp_io_rw_end,
- .cio_advance = vvp_io_advance,
- },
- [CIT_WRITE] = {
- .cio_fini = vvp_io_fini,
- .cio_iter_init = vvp_io_write_iter_init,
- .cio_iter_fini = vvp_io_write_iter_fini,
- .cio_lock = vvp_io_write_lock,
- .cio_start = vvp_io_write_start,
- .cio_end = vvp_io_rw_end,
- .cio_advance = vvp_io_advance,
- },
- [CIT_SETATTR] = {
- .cio_fini = vvp_io_setattr_fini,
- .cio_iter_init = vvp_io_setattr_iter_init,
- .cio_lock = vvp_io_setattr_lock,
- .cio_start = vvp_io_setattr_start,
- .cio_end = vvp_io_setattr_end
- },
- [CIT_FAULT] = {
- .cio_fini = vvp_io_fault_fini,
- .cio_iter_init = vvp_io_fault_iter_init,
- .cio_lock = vvp_io_fault_lock,
- .cio_start = vvp_io_fault_start,
- .cio_end = vvp_io_fault_end,
- },
- [CIT_FSYNC] = {
- .cio_start = vvp_io_fsync_start,
- .cio_fini = vvp_io_fini
- },
- [CIT_MISC] = {
- .cio_fini = vvp_io_fini
- }
- },
- .cio_read_ahead = vvp_io_read_ahead,
-};
-
-int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io)
-{
- struct vvp_io *vio = vvp_env_io(env);
- struct inode *inode = vvp_object_inode(obj);
- int result;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, DFID
- " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
- PFID(lu_object_fid(&obj->co_lu)),
- io->ci_ignore_layout, io->ci_verify_layout,
- vio->vui_layout_gen, io->ci_restore_needed);
-
- CL_IO_SLICE_CLEAN(vio, vui_cl);
- cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops);
- vio->vui_ra_valid = false;
- result = 0;
- if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) {
- size_t count;
- struct ll_inode_info *lli = ll_i2info(inode);
-
- count = io->u.ci_rw.crw_count;
- /* "If nbyte is 0, read() will return 0 and have no other
- * results." -- Single Unix Spec
- */
- if (count == 0)
- result = 1;
- else
- vio->vui_tot_count = count;
-
- /* for read/write, we store the jobid in the inode, and
- * it'll be fetched by osc when building RPC.
- *
- * it's not accurate if the file is shared by different
- * jobs.
- */
- lustre_get_jobid(lli->lli_jobid);
- } else if (io->ci_type == CIT_SETATTR) {
- if (!cl_io_is_trunc(io))
- io->ci_lockreq = CILR_MANDATORY;
- }
-
- /* Enqueue layout lock and get layout version. We need to do this
- * even for operations requiring to open file, such as read and write,
- * because it might not grant layout lock in IT_OPEN.
- */
- if (result == 0 && !io->ci_ignore_layout) {
- result = ll_layout_refresh(inode, &vio->vui_layout_gen);
- if (result == -ENOENT)
- /* If the inode on MDS has been removed, but the objects
- * on OSTs haven't been destroyed (async unlink), layout
- * fetch will return -ENOENT, we'd ignore this error
- * and continue with dirty flush. LU-3230.
- */
- result = 0;
- if (result < 0)
- CERROR("%s: refresh file layout " DFID " error %d.\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(lu_object_fid(&obj->co_lu)), result);
- }
-
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
deleted file mode 100644
index 4b6c7143bd2c..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ /dev/null
@@ -1,87 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2014, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_lock for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Vvp lock functions.
- *
- */
-
-static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
-{
- struct vvp_lock *vlk = cl2vvp_lock(slice);
-
- kmem_cache_free(vvp_lock_kmem, vlk);
-}
-
-static int vvp_lock_enqueue(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_io *unused, struct cl_sync_io *anchor)
-{
- CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj));
-
- return 0;
-}
-
-static const struct cl_lock_operations vvp_lock_ops = {
- .clo_fini = vvp_lock_fini,
- .clo_enqueue = vvp_lock_enqueue,
-};
-
-int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *unused)
-{
- struct vvp_lock *vlk;
- int result;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS);
- if (vlk) {
- cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops);
- result = 0;
- } else {
- result = -ENOMEM;
- }
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
deleted file mode 100644
index 05ad3b322a29..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ /dev/null
@@ -1,305 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl_object implementation for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/libcfs/libcfs.h>
-
-#include <obd.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Object operations.
- *
- */
-
-int vvp_object_invariant(const struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- return (S_ISREG(inode->i_mode) || inode->i_mode == 0) &&
- lli->lli_clob == obj;
-}
-
-static int vvp_object_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *o)
-{
- struct vvp_object *obj = lu2vvp(o);
- struct inode *inode = obj->vob_inode;
- struct ll_inode_info *lli;
-
- (*p)(env, cookie, "(%d %d) inode: %p ",
- atomic_read(&obj->vob_transient_pages),
- atomic_read(&obj->vob_mmap_cnt), inode);
- if (inode) {
- lli = ll_i2info(inode);
- (*p)(env, cookie, "%lu/%u %o %u %d %p " DFID,
- inode->i_ino, inode->i_generation, inode->i_mode,
- inode->i_nlink, atomic_read(&inode->i_count),
- lli->lli_clob, PFID(&lli->lli_fid));
- }
- return 0;
-}
-
-static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- /*
- * lov overwrites most of these fields in
- * lov_attr_get()->...lov_merge_lvb_kms(), except when inode
- * attributes are newer.
- */
-
- attr->cat_size = i_size_read(inode);
- attr->cat_mtime = inode->i_mtime.tv_sec;
- attr->cat_atime = inode->i_atime.tv_sec;
- attr->cat_ctime = inode->i_ctime.tv_sec;
- attr->cat_blocks = inode->i_blocks;
- attr->cat_uid = from_kuid(&init_user_ns, inode->i_uid);
- attr->cat_gid = from_kgid(&init_user_ns, inode->i_gid);
- /* KMS is not known by this layer */
- return 0; /* layers below have to fill in the rest */
-}
-
-static int vvp_attr_update(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned int valid)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- if (valid & CAT_UID)
- inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid);
- if (valid & CAT_GID)
- inode->i_gid = make_kgid(&init_user_ns, attr->cat_gid);
- if (valid & CAT_ATIME)
- inode->i_atime.tv_sec = attr->cat_atime;
- if (valid & CAT_MTIME)
- inode->i_mtime.tv_sec = attr->cat_mtime;
- if (valid & CAT_CTIME)
- inode->i_ctime.tv_sec = attr->cat_ctime;
- if (0 && valid & CAT_SIZE)
- i_size_write(inode, attr->cat_size);
- /* not currently necessary */
- if (0 && valid & (CAT_UID | CAT_GID | CAT_SIZE))
- mark_inode_dirty(inode);
- return 0;
-}
-
-static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_object_conf *conf)
-{
- struct ll_inode_info *lli = ll_i2info(conf->coc_inode);
-
- if (conf->coc_opc == OBJECT_CONF_INVALIDATE) {
- CDEBUG(D_VFSTRACE, DFID ": losing layout lock\n",
- PFID(&lli->lli_fid));
-
- ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE);
-
- /* Clean up page mmap for this inode.
- * The reason for us to do this is that if the page has
- * already been installed into memory space, the process
- * can access it without interacting with lustre, so this
- * page may be stale due to layout change, and the process
- * will never be notified.
- * This operation is expensive but mmap processes have to pay
- * a price themselves.
- */
- unmap_mapping_range(conf->coc_inode->i_mapping,
- 0, OBD_OBJECT_EOF, 0);
- }
-
- return 0;
-}
-
-static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
- int rc;
-
- rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
- if (rc < 0) {
- CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n",
- PFID(lu_object_fid(&obj->co_lu)), rc);
- return rc;
- }
-
- truncate_inode_pages(inode->i_mapping, 0);
- return 0;
-}
-
-static int vvp_object_glimpse(const struct lu_env *env,
- const struct cl_object *obj, struct ost_lvb *lvb)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- lvb->lvb_mtime = LTIME_S(inode->i_mtime);
- lvb->lvb_atime = LTIME_S(inode->i_atime);
- lvb->lvb_ctime = LTIME_S(inode->i_ctime);
- /*
- * LU-417: Add dirty pages block count lest i_blocks reports 0, some
- * "cp" or "tar" on remote node may think it's a completely sparse file
- * and skip it.
- */
- if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
- lvb->lvb_blocks = dirty_cnt(inode);
- return 0;
-}
-
-static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj,
- struct cl_req_attr *attr)
-{
- u64 valid_flags = OBD_MD_FLTYPE;
- struct inode *inode;
- struct obdo *oa;
-
- oa = attr->cra_oa;
- inode = vvp_object_inode(obj);
-
- if (attr->cra_type == CRT_WRITE)
- valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
- OBD_MD_FLUID | OBD_MD_FLGID;
- obdo_from_inode(oa, inode, valid_flags & attr->cra_flags);
- obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
- if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
- oa->o_parent_oid++;
- memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, LUSTRE_JOBID_SIZE);
-}
-
-static const struct cl_object_operations vvp_ops = {
- .coo_page_init = vvp_page_init,
- .coo_lock_init = vvp_lock_init,
- .coo_io_init = vvp_io_init,
- .coo_attr_get = vvp_attr_get,
- .coo_attr_update = vvp_attr_update,
- .coo_conf_set = vvp_conf_set,
- .coo_prune = vvp_prune,
- .coo_glimpse = vvp_object_glimpse,
- .coo_req_attr_set = vvp_req_attr_set
-};
-
-static int vvp_object_init0(const struct lu_env *env,
- struct vvp_object *vob,
- const struct cl_object_conf *conf)
-{
- vob->vob_inode = conf->coc_inode;
- atomic_set(&vob->vob_transient_pages, 0);
- cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page));
- return 0;
-}
-
-static int vvp_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf)
-{
- struct vvp_device *dev = lu2vvp_dev(obj->lo_dev);
- struct vvp_object *vob = lu2vvp(obj);
- struct lu_object *below;
- struct lu_device *under;
- int result;
-
- under = &dev->vdv_next->cd_lu_dev;
- below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
- if (below) {
- const struct cl_object_conf *cconf;
-
- cconf = lu2cl_conf(conf);
- lu_object_add(obj, below);
- result = vvp_object_init0(env, vob, cconf);
- } else {
- result = -ENOMEM;
- }
-
- return result;
-}
-
-static void vvp_object_free(const struct lu_env *env, struct lu_object *obj)
-{
- struct vvp_object *vob = lu2vvp(obj);
-
- lu_object_fini(obj);
- lu_object_header_fini(obj->lo_header);
- kmem_cache_free(vvp_object_kmem, vob);
-}
-
-static const struct lu_object_operations vvp_lu_obj_ops = {
- .loo_object_init = vvp_object_init,
- .loo_object_free = vvp_object_free,
- .loo_object_print = vvp_object_print,
-};
-
-struct vvp_object *cl_inode2vvp(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *obj = lli->lli_clob;
- struct lu_object *lu;
-
- lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type);
- LASSERT(lu);
- return lu2vvp(lu);
-}
-
-struct lu_object *vvp_object_alloc(const struct lu_env *env,
- const struct lu_object_header *unused,
- struct lu_device *dev)
-{
- struct vvp_object *vob;
- struct lu_object *obj;
-
- vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS);
- if (vob) {
- struct cl_object_header *hdr;
-
- obj = &vob->vob_cl.co_lu;
- hdr = &vob->vob_header;
- cl_object_header_init(hdr);
- hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
-
- lu_object_init(obj, &hdr->coh_lu, dev);
- lu_object_add_top(&hdr->coh_lu, obj);
-
- vob->vob_cl.co_ops = &vvp_ops;
- obj->lo_ops = &vvp_lu_obj_ops;
- } else {
- obj = NULL;
- }
- return obj;
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
deleted file mode 100644
index 6eb0565ddc22..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ /dev/null
@@ -1,523 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_page for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-flags.h>
-#include <linux/pagemap.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Page operations.
- *
- */
-
-static void vvp_page_fini_common(struct vvp_page *vpg)
-{
- struct page *vmpage = vpg->vpg_page;
-
- LASSERT(vmpage);
- put_page(vmpage);
-}
-
-static void vvp_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
-
- /*
- * vmpage->private was already cleared when page was moved into
- * VPG_FREEING state.
- */
- LASSERT((struct cl_page *)vmpage->private != slice->cpl_page);
- vvp_page_fini_common(vpg);
-}
-
-static int vvp_page_own(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io,
- int nonblock)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
-
- LASSERT(vmpage);
- if (nonblock) {
- if (!trylock_page(vmpage))
- return -EAGAIN;
-
- if (unlikely(PageWriteback(vmpage))) {
- unlock_page(vmpage);
- return -EAGAIN;
- }
-
- return 0;
- }
-
- lock_page(vmpage);
- wait_on_page_writeback(vmpage);
-
- return 0;
-}
-
-static void vvp_page_assume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
- wait_on_page_writeback(vmpage);
-}
-
-static void vvp_page_unassume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-}
-
-static void vvp_page_disown(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-
- unlock_page(cl2vm_page(slice));
-}
-
-static void vvp_page_discard(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct vvp_page *vpg = cl2vvp_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-
- if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used)
- ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED);
-
- ll_invalidate_page(vmpage);
-}
-
-static void vvp_page_delete(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct inode *inode = vmpage->mapping->host;
- struct cl_object *obj = slice->cpl_obj;
- struct cl_page *page = slice->cpl_page;
- int refc;
-
- LASSERT(PageLocked(vmpage));
- LASSERT((struct cl_page *)vmpage->private == page);
- LASSERT(inode == vvp_object_inode(obj));
-
- /* Drop the reference count held in vvp_page_init */
- refc = atomic_dec_return(&page->cp_ref);
- LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
-
- ClearPagePrivate(vmpage);
- vmpage->private = 0;
- /*
- * Reference from vmpage to cl_page is removed, but the reference back
- * is still here. It is removed later in vvp_page_fini().
- */
-}
-
-static void vvp_page_export(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int uptodate)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
- if (uptodate)
- SetPageUptodate(vmpage);
- else
- ClearPageUptodate(vmpage);
-}
-
-static int vvp_page_is_vmlocked(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- return PageLocked(cl2vm_page(slice)) ? -EBUSY : -ENODATA;
-}
-
-static int vvp_page_prep_read(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- /* Skip the page already marked as PG_uptodate. */
- return PageUptodate(cl2vm_page(slice)) ? -EALREADY : 0;
-}
-
-static int vvp_page_prep_write(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct cl_page *pg = slice->cpl_page;
-
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageDirty(vmpage));
-
- /* ll_writepage path is not a sync write, so need to set page writeback
- * flag
- */
- if (!pg->cp_sync_io)
- set_page_writeback(vmpage);
-
- return 0;
-}
-
-/**
- * Handles page transfer errors at VM level.
- *
- * This takes inode as a separate argument, because inode on which error is to
- * be set can be different from \a vmpage inode in case of direct-io.
- */
-static void vvp_vmpage_error(struct inode *inode, struct page *vmpage,
- int ioret)
-{
- struct vvp_object *obj = cl_inode2vvp(inode);
-
- if (ioret == 0) {
- ClearPageError(vmpage);
- obj->vob_discard_page_warned = 0;
- } else {
- SetPageError(vmpage);
- mapping_set_error(inode->i_mapping, ioret);
-
- if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
- obj->vob_discard_page_warned == 0) {
- obj->vob_discard_page_warned = 1;
- ll_dirty_page_discard_warn(vmpage, ioret);
- }
- }
-}
-
-static void vvp_page_completion_read(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
- struct cl_page *page = slice->cpl_page;
- struct inode *inode = vvp_object_inode(page->cp_obj);
-
- LASSERT(PageLocked(vmpage));
- CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret);
-
- if (vpg->vpg_defer_uptodate)
- ll_ra_count_put(ll_i2sbi(inode), 1);
-
- if (ioret == 0) {
- if (!vpg->vpg_defer_uptodate)
- cl_page_export(env, page, 1);
- } else {
- vpg->vpg_defer_uptodate = 0;
- }
-
- if (!page->cp_sync_io)
- unlock_page(vmpage);
-}
-
-static void vvp_page_completion_write(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct cl_page *pg = slice->cpl_page;
- struct page *vmpage = vpg->vpg_page;
-
- CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
-
- if (pg->cp_sync_io) {
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageWriteback(vmpage));
- } else {
- LASSERT(PageWriteback(vmpage));
- /*
- * Only mark the page error only when it's an async write
- * because applications won't wait for IO to finish.
- */
- vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret);
-
- end_page_writeback(vmpage);
- }
-}
-
-/**
- * Implements cl_page_operations::cpo_make_ready() method.
- *
- * This is called to yank a page from the transfer cache and to send it out as
- * a part of transfer. This function try-locks the page. If try-lock failed,
- * page is owned by some concurrent IO, and should be skipped (this is bad,
- * but hopefully rare situation, as it usually results in transfer being
- * shorter than possible).
- *
- * \retval 0 success, page can be placed into transfer
- *
- * \retval -EAGAIN page is either used by concurrent IO has been
- * truncated. Skip it.
- */
-static int vvp_page_make_ready(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct cl_page *pg = slice->cpl_page;
- int result = 0;
-
- lock_page(vmpage);
- if (clear_page_dirty_for_io(vmpage)) {
- LASSERT(pg->cp_state == CPS_CACHED);
- /* This actually clears the dirty bit in the radix tree. */
- set_page_writeback(vmpage);
- CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
- } else if (pg->cp_state == CPS_PAGEOUT) {
- /* is it possible for osc_flush_async_page() to already
- * make it ready?
- */
- result = -EALREADY;
- } else {
- CL_PAGE_DEBUG(D_ERROR, env, pg, "Unexpecting page state %d.\n",
- pg->cp_state);
- LBUG();
- }
- unlock_page(vmpage);
- return result;
-}
-
-static int vvp_page_print(const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t printer)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
-
- (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d) vm@%p ",
- vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used, vmpage);
- if (vmpage) {
- (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
- (long)vmpage->flags, page_count(vmpage),
- page_mapcount(vmpage), vmpage->private,
- vmpage->index,
- list_empty(&vmpage->lru) ? "not-" : "");
- }
-
- (*printer)(env, cookie, "\n");
-
- return 0;
-}
-
-static int vvp_page_fail(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- /*
- * Cached read?
- */
- LBUG();
-
- return 0;
-}
-
-static const struct cl_page_operations vvp_page_ops = {
- .cpo_own = vvp_page_own,
- .cpo_assume = vvp_page_assume,
- .cpo_unassume = vvp_page_unassume,
- .cpo_disown = vvp_page_disown,
- .cpo_discard = vvp_page_discard,
- .cpo_delete = vvp_page_delete,
- .cpo_export = vvp_page_export,
- .cpo_is_vmlocked = vvp_page_is_vmlocked,
- .cpo_fini = vvp_page_fini,
- .cpo_print = vvp_page_print,
- .io = {
- [CRT_READ] = {
- .cpo_prep = vvp_page_prep_read,
- .cpo_completion = vvp_page_completion_read,
- .cpo_make_ready = vvp_page_fail,
- },
- [CRT_WRITE] = {
- .cpo_prep = vvp_page_prep_write,
- .cpo_completion = vvp_page_completion_write,
- .cpo_make_ready = vvp_page_make_ready,
- },
- },
-};
-
-static int vvp_transient_page_prep(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- /* transient page should always be sent. */
- return 0;
-}
-
-static int vvp_transient_page_own(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused, int nonblock)
-{
- return 0;
-}
-
-static void vvp_transient_page_assume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
-}
-
-static void vvp_transient_page_unassume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
-}
-
-static void vvp_transient_page_disown(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
-}
-
-static void vvp_transient_page_discard(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct cl_page *page = slice->cpl_page;
-
- /*
- * For transient pages, remove it from the radix tree.
- */
- cl_page_delete(env, page);
-}
-
-static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct inode *inode = vvp_object_inode(slice->cpl_obj);
- int locked;
-
- locked = !inode_trylock(inode);
- if (!locked)
- inode_unlock(inode);
- return locked ? -EBUSY : -ENODATA;
-}
-
-static void
-vvp_transient_page_completion(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
-}
-
-static void vvp_transient_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct cl_page *clp = slice->cpl_page;
- struct vvp_object *clobj = cl2vvp(clp->cp_obj);
-
- vvp_page_fini_common(vpg);
- atomic_dec(&clobj->vob_transient_pages);
-}
-
-static const struct cl_page_operations vvp_transient_page_ops = {
- .cpo_own = vvp_transient_page_own,
- .cpo_assume = vvp_transient_page_assume,
- .cpo_unassume = vvp_transient_page_unassume,
- .cpo_disown = vvp_transient_page_disown,
- .cpo_discard = vvp_transient_page_discard,
- .cpo_fini = vvp_transient_page_fini,
- .cpo_is_vmlocked = vvp_transient_page_is_vmlocked,
- .cpo_print = vvp_page_print,
- .io = {
- [CRT_READ] = {
- .cpo_prep = vvp_transient_page_prep,
- .cpo_completion = vvp_transient_page_completion,
- },
- [CRT_WRITE] = {
- .cpo_prep = vvp_transient_page_prep,
- .cpo_completion = vvp_transient_page_completion,
- }
- }
-};
-
-int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
-{
- struct vvp_page *vpg = cl_object_page_slice(obj, page);
- struct page *vmpage = page->cp_vmpage;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- vpg->vpg_page = vmpage;
- get_page(vmpage);
-
- if (page->cp_type == CPT_CACHEABLE) {
- /* in cache, decref in vvp_page_delete */
- atomic_inc(&page->cp_ref);
- SetPagePrivate(vmpage);
- vmpage->private = (unsigned long)page;
- cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
- &vvp_page_ops);
- } else {
- struct vvp_object *clobj = cl2vvp(obj);
-
- cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
- &vvp_transient_page_ops);
- atomic_inc(&clobj->vob_transient_pages);
- }
- return 0;
-}
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
deleted file mode 100644
index 2d78432963dc..000000000000
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ /dev/null
@@ -1,638 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/xattr.h>
-#include <linux/selinux.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <lustre_dlm.h>
-
-#include "llite_internal.h"
-
-const struct xattr_handler *get_xattr_type(const char *name)
-{
- int i = 0;
-
- while (ll_xattr_handlers[i]) {
- size_t len = strlen(ll_xattr_handlers[i]->prefix);
-
- if (!strncmp(ll_xattr_handlers[i]->prefix, name, len))
- return ll_xattr_handlers[i];
- i++;
- }
- return NULL;
-}
-
-static int xattr_type_filter(struct ll_sb_info *sbi,
- const struct xattr_handler *handler)
-{
- /* No handler means XATTR_OTHER_T */
- if (!handler)
- return -EOPNOTSUPP;
-
- if ((handler->flags == XATTR_ACL_ACCESS_T ||
- handler->flags == XATTR_ACL_DEFAULT_T) &&
- !(sbi->ll_flags & LL_SBI_ACL))
- return -EOPNOTSUPP;
-
- if (handler->flags == XATTR_USER_T &&
- !(sbi->ll_flags & LL_SBI_USER_XATTR))
- return -EOPNOTSUPP;
-
- if (handler->flags == XATTR_TRUSTED_T &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- return 0;
-}
-
-static int
-ll_xattr_set_common(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value, size_t size,
- int flags)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- const char *pv = value;
- char *fullname;
- __u64 valid;
- int rc;
-
- if (flags == XATTR_REPLACE) {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
- valid = OBD_MD_FLXATTRRM;
- } else {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
- valid = OBD_MD_FLXATTR;
- }
-
- rc = xattr_type_filter(sbi, handler);
- if (rc)
- return rc;
-
- if ((handler->flags == XATTR_ACL_ACCESS_T ||
- handler->flags == XATTR_ACL_DEFAULT_T) &&
- !inode_owner_or_capable(inode))
- return -EPERM;
-
- /* b10667: ignore lustre special xattr for now */
- if (!strcmp(name, "hsm") ||
- ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) ||
- (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov"))))
- return 0;
-
- /* b15587: ignore security.capability xattr for now */
- if ((handler->flags == XATTR_SECURITY_T &&
- !strcmp(name, "capability")))
- return 0;
-
- /* LU-549: Disable security.selinux when selinux is disabled */
- if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
- strcmp(name, "selinux") == 0)
- return -EOPNOTSUPP;
-
- /*FIXME: enable IMA when the conditions are ready */
- if (handler->flags == XATTR_SECURITY_T &&
- (!strcmp(name, "ima") || !strcmp(name, "evm")))
- return -EOPNOTSUPP;
-
- /*
- * In user.* namespace, only regular files and directories can have
- * extended attributes.
- */
- if (handler->flags == XATTR_USER_T) {
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
- return -EPERM;
- }
-
- fullname = kasprintf(GFP_KERNEL, "%s%s\n", handler->prefix, name);
- if (!fullname)
- return -ENOMEM;
- rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
- valid, fullname, pv, size, 0, flags,
- ll_i2suppgid(inode), &req);
- kfree(fullname);
- if (rc) {
- if (rc == -EOPNOTSUPP && handler->flags == XATTR_USER_T) {
- LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
- }
- return rc;
- }
-
- ptlrpc_req_finished(req);
- return 0;
-}
-
-static int get_hsm_state(struct inode *inode, u32 *hus_states)
-{
- struct md_op_data *op_data;
- struct hsm_user_state *hus;
- int rc;
-
- hus = kzalloc(sizeof(*hus), GFP_NOFS);
- if (!hus)
- return -ENOMEM;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hus);
- if (!IS_ERR(op_data)) {
- rc = obd_iocontrol(LL_IOC_HSM_STATE_GET, ll_i2mdexp(inode),
- sizeof(*op_data), op_data, NULL);
- if (!rc)
- *hus_states = hus->hus_states;
- else
- CDEBUG(D_VFSTRACE, "obd_iocontrol failed. rc = %d\n",
- rc);
-
- ll_finish_md_op_data(op_data);
- } else {
- rc = PTR_ERR(op_data);
- CDEBUG(D_VFSTRACE, "Could not prepare the opdata. rc = %d\n",
- rc);
- }
- kfree(hus);
- return rc;
-}
-
-static int ll_xattr_set(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value, size_t size,
- int flags)
-{
- LASSERT(inode);
- LASSERT(name);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n",
- PFID(ll_inode2fid(inode)), inode, name);
-
- if (!strcmp(name, "lov")) {
- struct lov_user_md *lump = (struct lov_user_md *)value;
- int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
- LPROC_LL_SETXATTR;
- int rc = 0;
-
- ll_stats_ops_tally(ll_i2sbi(inode), op_type, 1);
-
- if (size != 0 && size < sizeof(struct lov_user_md))
- return -EINVAL;
-
- /*
- * It is possible to set an xattr to a "" value of zero size.
- * For this case we are going to treat it as a removal.
- */
- if (!size && lump)
- lump = NULL;
-
- /* Attributes that are saved via getxattr will always have
- * the stripe_offset as 0. Instead, the MDS should be
- * allowed to pick the starting OST index. b=17846
- */
- if (lump && lump->lmm_stripe_offset == 0)
- lump->lmm_stripe_offset = -1;
-
- /* Avoid anyone directly setting the RELEASED flag. */
- if (lump && (lump->lmm_pattern & LOV_PATTERN_F_RELEASED)) {
- /* Only if we have a released flag check if the file
- * was indeed archived.
- */
- u32 state = HS_NONE;
-
- rc = get_hsm_state(inode, &state);
- if (rc)
- return rc;
-
- if (!(state & HS_ARCHIVED)) {
- CDEBUG(D_VFSTRACE,
- "hus_states state = %x, pattern = %x\n",
- state, lump->lmm_pattern);
- /*
- * Here the state is: real file is not
- * archived but user is requesting to set
- * the RELEASED flag so we mask off the
- * released flag from the request
- */
- lump->lmm_pattern ^= LOV_PATTERN_F_RELEASED;
- }
- }
-
- if (lump && S_ISREG(inode->i_mode)) {
- __u64 it_flags = FMODE_WRITE;
- int lum_size;
-
- lum_size = ll_lov_user_md_size(lump);
- if (lum_size < 0 || size < lum_size)
- return 0; /* b=10667: ignore error */
-
- rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags,
- lump, lum_size);
- /* b=10667: rc always be 0 here for now */
- rc = 0;
- } else if (S_ISDIR(inode->i_mode)) {
- rc = ll_dir_setstripe(inode, lump, 0);
- }
-
- return rc;
-
- } else if (!strcmp(name, "lma") || !strcmp(name, "link")) {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
- return 0;
- }
-
- return ll_xattr_set_common(handler, dentry, inode, name, value, size,
- flags);
-}
-
-int
-ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer,
- size_t size, __u64 valid)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- struct mdt_body *body;
- void *xdata;
- int rc;
-
- if (sbi->ll_xattr_cache_enabled && type != XATTR_ACL_ACCESS_T &&
- (type != XATTR_SECURITY_T || strcmp(name, "security.selinux"))) {
- rc = ll_xattr_cache_get(inode, name, buffer, size, valid);
- if (rc == -EAGAIN)
- goto getxattr_nocache;
- if (rc < 0)
- goto out_xattr;
-
- /* Add "system.posix_acl_access" to the list */
- if (lli->lli_posix_acl && valid & OBD_MD_FLXATTRLS) {
- if (size == 0) {
- rc += sizeof(XATTR_NAME_ACL_ACCESS);
- } else if (size - rc >= sizeof(XATTR_NAME_ACL_ACCESS)) {
- memcpy(buffer + rc, XATTR_NAME_ACL_ACCESS,
- sizeof(XATTR_NAME_ACL_ACCESS));
- rc += sizeof(XATTR_NAME_ACL_ACCESS);
- } else {
- rc = -ERANGE;
- goto out_xattr;
- }
- }
- } else {
-getxattr_nocache:
- rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
- valid, name, NULL, 0, size, 0, &req);
- if (rc < 0)
- goto out_xattr;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- LASSERT(body);
-
- /* only detect the xattr size */
- if (size == 0) {
- rc = body->mbo_eadatasize;
- goto out;
- }
-
- if (size < body->mbo_eadatasize) {
- CERROR("server bug: replied size %u > %u\n",
- body->mbo_eadatasize, (int)size);
- rc = -ERANGE;
- goto out;
- }
-
- if (body->mbo_eadatasize == 0) {
- rc = -ENODATA;
- goto out;
- }
-
- /* do not need swab xattr data */
- xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
- body->mbo_eadatasize);
- if (!xdata) {
- rc = -EFAULT;
- goto out;
- }
-
- memcpy(buffer, xdata, body->mbo_eadatasize);
- rc = body->mbo_eadatasize;
- }
-
-out_xattr:
- if (rc == -EOPNOTSUPP && type == XATTR_USER_T) {
- LCONSOLE_INFO(
- "%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), rc);
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
- }
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int ll_xattr_get_common(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-#ifdef CONFIG_FS_POSIX_ACL
- struct ll_inode_info *lli = ll_i2info(inode);
-#endif
- char *fullname;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
-
- rc = xattr_type_filter(sbi, handler);
- if (rc)
- return rc;
-
- /* b15587: ignore security.capability xattr for now */
- if ((handler->flags == XATTR_SECURITY_T && !strcmp(name, "capability")))
- return -ENODATA;
-
- /* LU-549: Disable security.selinux when selinux is disabled */
- if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
- !strcmp(name, "selinux"))
- return -EOPNOTSUPP;
-
-#ifdef CONFIG_FS_POSIX_ACL
- /* posix acl is under protection of LOOKUP lock. when calling to this,
- * we just have path resolution to the target inode, so we have great
- * chance that cached ACL is uptodate.
- */
- if (handler->flags == XATTR_ACL_ACCESS_T) {
- struct posix_acl *acl;
-
- spin_lock(&lli->lli_lock);
- acl = posix_acl_dup(lli->lli_posix_acl);
- spin_unlock(&lli->lli_lock);
-
- if (!acl)
- return -ENODATA;
-
- rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
- return rc;
- }
- if (handler->flags == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
- return -ENODATA;
-#endif
- fullname = kasprintf(GFP_KERNEL, "%s%s\n", handler->prefix, name);
- if (!fullname)
- return -ENOMEM;
- rc = ll_xattr_list(inode, fullname, handler->flags, buffer, size,
- OBD_MD_FLXATTR);
- kfree(fullname);
- return rc;
-}
-
-static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size)
-{
- ssize_t rc;
-
- if (S_ISREG(inode->i_mode)) {
- struct cl_object *obj = ll_i2info(inode)->lli_clob;
- struct cl_layout cl = {
- .cl_buf.lb_buf = buf,
- .cl_buf.lb_len = buf_size,
- };
- struct lu_env *env;
- u16 refcheck;
-
- if (!obj)
- return -ENODATA;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- rc = cl_object_layout_get(env, obj, &cl);
- if (rc < 0)
- goto out_env;
-
- if (!cl.cl_size) {
- rc = -ENODATA;
- goto out_env;
- }
-
- rc = cl.cl_size;
-
- if (!buf_size)
- goto out_env;
-
- LASSERT(buf && rc <= buf_size);
-
- /*
- * Do not return layout gen for getxattr() since
- * otherwise it would confuse tar --xattr by
- * recognizing layout gen as stripe offset when the
- * file is restored. See LU-2809.
- */
- ((struct lov_mds_md *)buf)->lmm_layout_gen = 0;
-out_env:
- cl_env_put(env, &refcheck);
-
- return rc;
- } else if (S_ISDIR(inode->i_mode)) {
- struct ptlrpc_request *req = NULL;
- struct lov_mds_md *lmm = NULL;
- int lmm_size = 0;
-
- rc = ll_dir_getstripe(inode, (void **)&lmm, &lmm_size,
- &req, 0);
- if (rc < 0)
- goto out_req;
-
- if (!buf_size) {
- rc = lmm_size;
- goto out_req;
- }
-
- if (buf_size < lmm_size) {
- rc = -ERANGE;
- goto out_req;
- }
-
- memcpy(buf, lmm, lmm_size);
- rc = lmm_size;
-out_req:
- if (req)
- ptlrpc_req_finished(req);
-
- return rc;
- } else {
- return -ENODATA;
- }
-}
-
-static int ll_xattr_get(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
-{
- LASSERT(inode);
- LASSERT(name);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n",
- PFID(ll_inode2fid(inode)), inode, name);
-
- if (!strcmp(name, "lov")) {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
-
- return ll_getxattr_lov(inode, buffer, size);
- }
-
- return ll_xattr_get_common(handler, dentry, inode, name, buffer, size);
-}
-
-ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
- struct inode *inode = d_inode(dentry);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- char *xattr_name;
- ssize_t rc, rc2;
- size_t len, rem;
-
- LASSERT(inode);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);
-
- rc = ll_xattr_list(inode, NULL, XATTR_OTHER_T, buffer, size,
- OBD_MD_FLXATTRLS);
- if (rc < 0)
- return rc;
- /*
- * If we're being called to get the size of the xattr list
- * (buf_size == 0) then just assume that a lustre.lov xattr
- * exists.
- */
- if (!size)
- return rc + sizeof(XATTR_LUSTRE_LOV);
-
- xattr_name = buffer;
- rem = rc;
-
- while (rem > 0) {
- len = strnlen(xattr_name, rem - 1) + 1;
- rem -= len;
- if (!xattr_type_filter(sbi, get_xattr_type(xattr_name))) {
- /* Skip OK xattr type leave it in buffer */
- xattr_name += len;
- continue;
- }
-
- /*
- * Move up remaining xattrs in buffer
- * removing the xattr that is not OK
- */
- memmove(xattr_name, xattr_name + len, rem);
- rc -= len;
- }
-
- rc2 = ll_getxattr_lov(inode, NULL, 0);
- if (rc2 == -ENODATA)
- return rc;
-
- if (rc2 < 0)
- return rc2;
-
- if (size < rc + sizeof(XATTR_LUSTRE_LOV))
- return -ERANGE;
-
- memcpy(buffer + rc, XATTR_LUSTRE_LOV, sizeof(XATTR_LUSTRE_LOV));
-
- return rc + sizeof(XATTR_LUSTRE_LOV);
-}
-
-static const struct xattr_handler ll_user_xattr_handler = {
- .prefix = XATTR_USER_PREFIX,
- .flags = XATTR_USER_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_trusted_xattr_handler = {
- .prefix = XATTR_TRUSTED_PREFIX,
- .flags = XATTR_TRUSTED_T,
- .get = ll_xattr_get,
- .set = ll_xattr_set,
-};
-
-static const struct xattr_handler ll_security_xattr_handler = {
- .prefix = XATTR_SECURITY_PREFIX,
- .flags = XATTR_SECURITY_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_acl_access_xattr_handler = {
- .prefix = XATTR_NAME_POSIX_ACL_ACCESS,
- .flags = XATTR_ACL_ACCESS_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_acl_default_xattr_handler = {
- .prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
- .flags = XATTR_ACL_DEFAULT_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_lustre_xattr_handler = {
- .prefix = XATTR_LUSTRE_PREFIX,
- .flags = XATTR_LUSTRE_T,
- .get = ll_xattr_get,
- .set = ll_xattr_set,
-};
-
-const struct xattr_handler *ll_xattr_handlers[] = {
- &ll_user_xattr_handler,
- &ll_trusted_xattr_handler,
- &ll_security_xattr_handler,
-#ifdef CONFIG_FS_POSIX_ACL
- &ll_acl_access_xattr_handler,
- &ll_acl_default_xattr_handler,
-#endif
- &ll_lustre_xattr_handler,
- NULL,
-};
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
deleted file mode 100644
index 4dc799d60a9f..000000000000
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ /dev/null
@@ -1,523 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Copyright (c) 2013, 2015, Intel Corporation.
- *
- * Author: Andrew Perepechko <Andrew_Perepechko@xyratex.com>
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <obd_support.h>
-#include <lustre_dlm.h>
-#include "llite_internal.h"
-
-/* If we ever have hundreds of extended attributes, we might want to consider
- * using a hash or a tree structure instead of list for faster lookups.
- */
-struct ll_xattr_entry {
- struct list_head xe_list; /* protected with
- * lli_xattrs_list_rwsem
- */
- char *xe_name; /* xattr name, \0-terminated */
- char *xe_value; /* xattr value */
- unsigned int xe_namelen; /* strlen(xe_name) + 1 */
- unsigned int xe_vallen; /* xattr value length */
-};
-
-static struct kmem_cache *xattr_kmem;
-static struct lu_kmem_descr xattr_caches[] = {
- {
- .ckd_cache = &xattr_kmem,
- .ckd_name = "xattr_kmem",
- .ckd_size = sizeof(struct ll_xattr_entry)
- },
- {
- .ckd_cache = NULL
- }
-};
-
-int ll_xattr_init(void)
-{
- return lu_kmem_init(xattr_caches);
-}
-
-void ll_xattr_fini(void)
-{
- lu_kmem_fini(xattr_caches);
-}
-
-/**
- * Initializes xattr cache for an inode.
- *
- * This initializes the xattr list and marks cache presence.
- */
-static void ll_xattr_cache_init(struct ll_inode_info *lli)
-{
- INIT_LIST_HEAD(&lli->lli_xattrs);
- set_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
-}
-
-/**
- * This looks for a specific extended attribute.
- *
- * Find in @cache and return @xattr_name attribute in @xattr,
- * for the NULL @xattr_name return the first cached @xattr.
- *
- * \retval 0 success
- * \retval -ENODATA if not found
- */
-static int ll_xattr_cache_find(struct list_head *cache,
- const char *xattr_name,
- struct ll_xattr_entry **xattr)
-{
- struct ll_xattr_entry *entry;
-
- list_for_each_entry(entry, cache, xe_list) {
- /* xattr_name == NULL means look for any entry */
- if (!xattr_name || strcmp(xattr_name, entry->xe_name) == 0) {
- *xattr = entry;
- CDEBUG(D_CACHE, "find: [%s]=%.*s\n",
- entry->xe_name, entry->xe_vallen,
- entry->xe_value);
- return 0;
- }
- }
-
- return -ENODATA;
-}
-
-/**
- * This adds an xattr.
- *
- * Add @xattr_name attr with @xattr_val value and @xattr_val_len length,
- *
- * \retval 0 success
- * \retval -ENOMEM if no memory could be allocated for the cached attr
- * \retval -EPROTO if duplicate xattr is being added
- */
-static int ll_xattr_cache_add(struct list_head *cache,
- const char *xattr_name,
- const char *xattr_val,
- unsigned int xattr_val_len)
-{
- struct ll_xattr_entry *xattr;
-
- if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) {
- CDEBUG(D_CACHE, "duplicate xattr: [%s]\n", xattr_name);
- return -EPROTO;
- }
-
- xattr = kmem_cache_zalloc(xattr_kmem, GFP_NOFS);
- if (!xattr) {
- CDEBUG(D_CACHE, "failed to allocate xattr\n");
- return -ENOMEM;
- }
-
- xattr->xe_name = kstrdup(xattr_name, GFP_NOFS);
- if (!xattr->xe_name) {
- CDEBUG(D_CACHE, "failed to alloc xattr name %u\n",
- xattr->xe_namelen);
- goto err_name;
- }
- xattr->xe_value = kmemdup(xattr_val, xattr_val_len, GFP_NOFS);
- if (!xattr->xe_value)
- goto err_value;
-
- xattr->xe_vallen = xattr_val_len;
- list_add(&xattr->xe_list, cache);
-
- CDEBUG(D_CACHE, "set: [%s]=%.*s\n", xattr_name, xattr_val_len,
- xattr_val);
-
- return 0;
-err_value:
- kfree(xattr->xe_name);
-err_name:
- kmem_cache_free(xattr_kmem, xattr);
-
- return -ENOMEM;
-}
-
-/**
- * This removes an extended attribute from cache.
- *
- * Remove @xattr_name attribute from @cache.
- *
- * \retval 0 success
- * \retval -ENODATA if @xattr_name is not cached
- */
-static int ll_xattr_cache_del(struct list_head *cache,
- const char *xattr_name)
-{
- struct ll_xattr_entry *xattr;
-
- CDEBUG(D_CACHE, "del xattr: %s\n", xattr_name);
-
- if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) {
- list_del(&xattr->xe_list);
- kfree(xattr->xe_name);
- kfree(xattr->xe_value);
- kmem_cache_free(xattr_kmem, xattr);
-
- return 0;
- }
-
- return -ENODATA;
-}
-
-/**
- * This iterates cached extended attributes.
- *
- * Walk over cached attributes in @cache and
- * fill in @xld_buffer or only calculate buffer
- * size if @xld_buffer is NULL.
- *
- * \retval >= 0 buffer list size
- * \retval -ENODATA if the list cannot fit @xld_size buffer
- */
-static int ll_xattr_cache_list(struct list_head *cache,
- char *xld_buffer,
- int xld_size)
-{
- struct ll_xattr_entry *xattr, *tmp;
- int xld_tail = 0;
-
- list_for_each_entry_safe(xattr, tmp, cache, xe_list) {
- CDEBUG(D_CACHE, "list: buffer=%p[%d] name=%s\n",
- xld_buffer, xld_tail, xattr->xe_name);
-
- if (xld_buffer) {
- xld_size -= xattr->xe_namelen;
- if (xld_size < 0)
- break;
- memcpy(&xld_buffer[xld_tail],
- xattr->xe_name, xattr->xe_namelen);
- }
- xld_tail += xattr->xe_namelen;
- }
-
- if (xld_size < 0)
- return -ERANGE;
-
- return xld_tail;
-}
-
-/**
- * Check if the xattr cache is initialized (filled).
- *
- * \retval 0 @cache is not initialized
- * \retval 1 @cache is initialized
- */
-static int ll_xattr_cache_valid(struct ll_inode_info *lli)
-{
- return test_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
-}
-
-/**
- * This finalizes the xattr cache.
- *
- * Free all xattr memory. @lli is the inode info pointer.
- *
- * \retval 0 no error occurred
- */
-static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli)
-{
- if (!ll_xattr_cache_valid(lli))
- return 0;
-
- while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0)
- ; /* empty loop */
-
- clear_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
-
- return 0;
-}
-
-int ll_xattr_cache_destroy(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- down_write(&lli->lli_xattrs_list_rwsem);
- rc = ll_xattr_cache_destroy_locked(lli);
- up_write(&lli->lli_xattrs_list_rwsem);
-
- return rc;
-}
-
-/**
- * Match or enqueue a PR lock.
- *
- * Find or request an LDLM lock with xattr data.
- * Since LDLM does not provide API for atomic match_or_enqueue,
- * the function handles it with a separate enq lock.
- * If successful, the function exits with the list lock held.
- *
- * \retval 0 no error occurred
- * \retval -ENOMEM not enough memory
- */
-static int ll_xattr_find_get_lock(struct inode *inode,
- struct lookup_intent *oit,
- struct ptlrpc_request **req)
-{
- enum ldlm_mode mode;
- struct lustre_handle lockh = { 0 };
- struct md_op_data *op_data;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ldlm_enqueue_info einfo = {
- .ei_type = LDLM_IBITS,
- .ei_mode = it_to_lock_mode(oit),
- .ei_cb_bl = &ll_md_blocking_ast,
- .ei_cb_cp = &ldlm_completion_ast,
- };
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct obd_export *exp = sbi->ll_md_exp;
- int rc;
-
- mutex_lock(&lli->lli_xattrs_enq_lock);
- /* inode may have been shrunk and recreated, so data is gone, match lock
- * only when data exists.
- */
- if (ll_xattr_cache_valid(lli)) {
- /* Try matching first. */
- mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0,
- LCK_PR);
- if (mode != 0) {
- /* fake oit in mdc_revalidate_lock() manner */
- oit->it_lock_handle = lockh.cookie;
- oit->it_lock_mode = mode;
- goto out;
- }
- }
-
- /* Enqueue if the lock isn't cached locally. */
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- mutex_unlock(&lli->lli_xattrs_enq_lock);
- return PTR_ERR(op_data);
- }
-
- op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS;
-
- rc = md_enqueue(exp, &einfo, NULL, oit, op_data, &lockh, 0);
- ll_finish_md_op_data(op_data);
-
- if (rc < 0) {
- CDEBUG(D_CACHE,
- "md_intent_lock failed with %d for fid " DFID "\n",
- rc, PFID(ll_inode2fid(inode)));
- mutex_unlock(&lli->lli_xattrs_enq_lock);
- return rc;
- }
-
- *req = oit->it_request;
-out:
- down_write(&lli->lli_xattrs_list_rwsem);
- mutex_unlock(&lli->lli_xattrs_enq_lock);
-
- return 0;
-}
-
-/**
- * Refill the xattr cache.
- *
- * Fetch and cache the whole of xattrs for @inode, acquiring
- * a read or a write xattr lock depending on operation in @oit.
- * Intent is dropped on exit unless the operation is setxattr.
- *
- * \retval 0 no error occurred
- * \retval -EPROTO network protocol error
- * \retval -ENOMEM not enough memory for the cache
- */
-static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- const char *xdata, *xval, *xtail, *xvtail;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct mdt_body *body;
- __u32 *xsizes;
- int rc, i;
-
- rc = ll_xattr_find_get_lock(inode, oit, &req);
- if (rc)
- goto out_no_unlock;
-
- /* Do we have the data at this point? */
- if (ll_xattr_cache_valid(lli)) {
- ll_stats_ops_tally(sbi, LPROC_LL_GETXATTR_HITS, 1);
- rc = 0;
- goto out_maybe_drop;
- }
-
- /* Matched but no cache? Cancelled on error by a parallel refill. */
- if (unlikely(!req)) {
- CDEBUG(D_CACHE, "cancelled by a parallel getxattr\n");
- rc = -EIO;
- goto out_maybe_drop;
- }
-
- if (oit->it_status < 0) {
- CDEBUG(D_CACHE,
- "getxattr intent returned %d for fid " DFID "\n",
- oit->it_status, PFID(ll_inode2fid(inode)));
- rc = oit->it_status;
- /* xattr data is so large that we don't want to cache it */
- if (rc == -ERANGE)
- rc = -EAGAIN;
- goto out_destroy;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- CERROR("no MDT BODY in the refill xattr reply\n");
- rc = -EPROTO;
- goto out_destroy;
- }
- /* do not need swab xattr data */
- xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
- body->mbo_eadatasize);
- xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS,
- body->mbo_aclsize);
- xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS,
- body->mbo_max_mdsize * sizeof(__u32));
- if (!xdata || !xval || !xsizes) {
- CERROR("wrong setxattr reply\n");
- rc = -EPROTO;
- goto out_destroy;
- }
-
- xtail = xdata + body->mbo_eadatasize;
- xvtail = xval + body->mbo_aclsize;
-
- CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail);
-
- ll_xattr_cache_init(lli);
-
- for (i = 0; i < body->mbo_max_mdsize; i++) {
- CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval);
- /* Perform consistency checks: attr names and vals in pill */
- if (!memchr(xdata, 0, xtail - xdata)) {
- CERROR("xattr protocol violation (names are broken)\n");
- rc = -EPROTO;
- } else if (xval + *xsizes > xvtail) {
- CERROR("xattr protocol violation (vals are broken)\n");
- rc = -EPROTO;
- } else if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_XATTR_ENOMEM)) {
- rc = -ENOMEM;
- } else if (!strcmp(xdata, XATTR_NAME_ACL_ACCESS)) {
- /* Filter out ACL ACCESS since it's cached separately */
- CDEBUG(D_CACHE, "not caching %s\n",
- XATTR_NAME_ACL_ACCESS);
- rc = 0;
- } else if (!strcmp(xdata, "security.selinux")) {
- /* Filter out security.selinux, it is cached in slab */
- CDEBUG(D_CACHE, "not caching security.selinux\n");
- rc = 0;
- } else {
- rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval,
- *xsizes);
- }
- if (rc < 0) {
- ll_xattr_cache_destroy_locked(lli);
- goto out_destroy;
- }
- xdata += strlen(xdata) + 1;
- xval += *xsizes;
- xsizes++;
- }
-
- if (xdata != xtail || xval != xvtail)
- CERROR("a hole in xattr data\n");
-
- ll_set_lock_data(sbi->ll_md_exp, inode, oit, NULL);
-
- goto out_maybe_drop;
-out_maybe_drop:
-
- ll_intent_drop_lock(oit);
-
- if (rc != 0)
- up_write(&lli->lli_xattrs_list_rwsem);
-out_no_unlock:
- ptlrpc_req_finished(req);
-
- return rc;
-
-out_destroy:
- up_write(&lli->lli_xattrs_list_rwsem);
-
- ldlm_lock_decref_and_cancel((struct lustre_handle *)
- &oit->it_lock_handle,
- oit->it_lock_mode);
-
- goto out_no_unlock;
-}
-
-/**
- * Get an xattr value or list xattrs using the write-through cache.
- *
- * Get the xattr value (@valid has OBD_MD_FLXATTR set) of @name or
- * list xattr names (@valid has OBD_MD_FLXATTRLS set) for @inode.
- * The resulting value/list is stored in @buffer if the former
- * is not larger than @size.
- *
- * \retval 0 no error occurred
- * \retval -EPROTO network protocol error
- * \retval -ENOMEM not enough memory for the cache
- * \retval -ERANGE the buffer is not large enough
- * \retval -ENODATA no such attr or the list is empty
- */
-int ll_xattr_cache_get(struct inode *inode, const char *name, char *buffer,
- size_t size, __u64 valid)
-{
- struct lookup_intent oit = { .it_op = IT_GETXATTR };
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc = 0;
-
- LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRLS));
-
- down_read(&lli->lli_xattrs_list_rwsem);
- if (!ll_xattr_cache_valid(lli)) {
- up_read(&lli->lli_xattrs_list_rwsem);
- rc = ll_xattr_cache_refill(inode, &oit);
- if (rc)
- return rc;
- downgrade_write(&lli->lli_xattrs_list_rwsem);
- } else {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR_HITS, 1);
- }
-
- if (valid & OBD_MD_FLXATTR) {
- struct ll_xattr_entry *xattr;
-
- rc = ll_xattr_cache_find(&lli->lli_xattrs, name, &xattr);
- if (rc == 0) {
- rc = xattr->xe_vallen;
- /* zero size means we are only requested size in rc */
- if (size != 0) {
- if (size >= xattr->xe_vallen)
- memcpy(buffer, xattr->xe_value,
- xattr->xe_vallen);
- else
- rc = -ERANGE;
- }
- }
- } else if (valid & OBD_MD_FLXATTRLS) {
- rc = ll_xattr_cache_list(&lli->lli_xattrs,
- size ? buffer : NULL, size);
- }
-
- goto out;
-out:
- up_read(&lli->lli_xattrs_list_rwsem);
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/xattr_security.c b/drivers/staging/lustre/lustre/llite/xattr_security.c
deleted file mode 100644
index 93ec07531ac7..000000000000
--- a/drivers/staging/lustre/lustre/llite/xattr_security.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright (c) 2014 Bull SAS
- * Author: Sebastien Buisson sebastien.buisson@bull.net
- */
-
-/*
- * lustre/llite/xattr_security.c
- * Handler for storing security labels as extended attributes.
- */
-
-#include <linux/types.h>
-#include <linux/security.h>
-#include <linux/selinux.h>
-#include <linux/xattr.h>
-#include "llite_internal.h"
-
-/**
- * A helper function for ll_security_inode_init_security()
- * that takes care of setting xattrs
- *
- * Get security context of @inode from @xattr_array,
- * and put it in 'security.xxx' xattr of dentry
- * stored in @fs_info.
- *
- * \retval 0 success
- * \retval -ENOMEM if no memory could be allocated for xattr name
- * \retval < 0 failure to set xattr
- */
-static int
-ll_initxattrs(struct inode *inode, const struct xattr *xattr_array,
- void *fs_info)
-{
- struct dentry *dentry = fs_info;
- const struct xattr *xattr;
- int err = 0;
-
- for (xattr = xattr_array; xattr->name; xattr++) {
- char *full_name;
-
- full_name = kasprintf(GFP_KERNEL, "%s%s",
- XATTR_SECURITY_PREFIX, xattr->name);
- if (!full_name) {
- err = -ENOMEM;
- break;
- }
-
- err = __vfs_setxattr(dentry, inode, full_name, xattr->value,
- xattr->value_len, XATTR_CREATE);
- kfree(full_name);
- if (err < 0)
- break;
- }
- return err;
-}
-
-/**
- * Initializes security context
- *
- * Get security context of @inode in @dir,
- * and put it in 'security.xxx' xattr of @dentry.
- *
- * \retval 0 success, or SELinux is disabled
- * \retval -ENOMEM if no memory could be allocated for xattr name
- * \retval < 0 failure to get security context or set xattr
- */
-int
-ll_init_security(struct dentry *dentry, struct inode *inode, struct inode *dir)
-{
- if (!selinux_is_enabled())
- return 0;
-
- return security_inode_init_security(inode, dir, NULL,
- &ll_initxattrs, dentry);
-}