aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/llite
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/llite')
-rw-r--r--drivers/staging/lustre/lustre/llite/Makefile11
-rw-r--r--drivers/staging/lustre/lustre/llite/dcache.c300
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c1706
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c3600
-rw-r--r--drivers/staging/lustre/lustre/llite/glimpse.c206
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_cl.c293
-rw-r--r--drivers/staging/lustre/lustre/llite/lcommon_misc.c186
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h1337
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c2666
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_mmap.c478
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_nfs.c375
-rw-r--r--drivers/staging/lustre/lustre/llite/lproc_llite.c1684
-rw-r--r--drivers/staging/lustre/lustre/llite/namei.c1202
-rw-r--r--drivers/staging/lustre/lustre/llite/range_lock.c240
-rw-r--r--drivers/staging/lustre/lustre/llite/range_lock.h83
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c1214
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c641
-rw-r--r--drivers/staging/lustre/lustre/llite/statahead.c1577
-rw-r--r--drivers/staging/lustre/lustre/llite/super25.c185
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c159
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_dev.c659
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_internal.h321
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c1374
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_lock.c87
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_object.c305
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c523
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c638
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_cache.c523
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr_security.c96
29 files changed, 0 insertions, 22669 deletions
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
deleted file mode 100644
index 519fd747e3ad..000000000000
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include
-subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include
-
-obj-$(CONFIG_LUSTRE_FS) += lustre.o
-lustre-y := dcache.o dir.o file.o llite_lib.o llite_nfs.o \
- rw.o rw26.o namei.o symlink.o llite_mmap.o range_lock.o \
- xattr.o xattr_cache.o xattr_security.o \
- super25.o statahead.o glimpse.o lcommon_cl.o lcommon_misc.o \
- vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o \
- lproc_llite.o
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
deleted file mode 100644
index 11b82c639bfe..000000000000
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ /dev/null
@@ -1,300 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/quotaops.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <lustre_dlm.h>
-
-#include "llite_internal.h"
-
-static void free_dentry_data(struct rcu_head *head)
-{
- struct ll_dentry_data *lld;
-
- lld = container_of(head, struct ll_dentry_data, lld_rcu_head);
- kfree(lld);
-}
-
-/* should NOT be called with the dcache lock, see fs/dcache.c */
-static void ll_release(struct dentry *de)
-{
- struct ll_dentry_data *lld;
-
- LASSERT(de);
- lld = ll_d2d(de);
- if (lld->lld_it) {
- ll_intent_release(lld->lld_it);
- kfree(lld->lld_it);
- }
-
- de->d_fsdata = NULL;
- call_rcu(&lld->lld_rcu_head, free_dentry_data);
-}
-
-/* Compare if two dentries are the same. Don't match if the existing dentry
- * is marked invalid. Returns 1 if different, 0 if the same.
- *
- * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
- * an AST before calling d_revalidate_it(). The dentry still exists (marked
- * INVALID) so d_lookup() matches it, but we have no lock on it (so
- * lock_match() fails) and we spin around real_lookup().
- *
- * This race doesn't apply to lookups in d_alloc_parallel(), and for
- * those we want to ensure that only one dentry with a given name is
- * in ll_lookup_nd() at a time. So allow invalid dentries to match
- * while d_in_lookup(). We will be called again when the lookup
- * completes, and can give a different answer then.
- */
-static int ll_dcompare(const struct dentry *dentry,
- unsigned int len, const char *str,
- const struct qstr *name)
-{
- if (len != name->len)
- return 1;
-
- if (memcmp(str, name->name, len))
- return 1;
-
- CDEBUG(D_DENTRY, "found name %.*s(%p) flags %#x refc %d\n",
- name->len, name->name, dentry, dentry->d_flags,
- d_count(dentry));
-
- /* mountpoint is always valid */
- if (d_mountpoint(dentry))
- return 0;
-
- /* ensure exclusion against parallel lookup of the same name */
- if (d_in_lookup((struct dentry *)dentry))
- return 0;
-
- if (d_lustre_invalid(dentry))
- return 1;
-
- return 0;
-}
-
-/**
- * Called when last reference to a dentry is dropped and dcache wants to know
- * whether or not it should cache it:
- * - return 1 to delete the dentry immediately
- * - return 0 to cache the dentry
- * Should NOT be called with the dcache lock, see fs/dcache.c
- */
-static int ll_ddelete(const struct dentry *de)
-{
- LASSERT(de);
-
- CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n",
- d_lustre_invalid(de) ? "deleting" : "keeping",
- de, de, de->d_parent, d_inode(de),
- d_unhashed(de) ? "" : "hashed,",
- list_empty(&de->d_subdirs) ? "" : "subdirs");
-
- /* kernel >= 2.6.38 last refcount is decreased after this function. */
- LASSERT(d_count(de) == 1);
-
- if (d_lustre_invalid(de))
- return 1;
- return 0;
-}
-
-static int ll_d_init(struct dentry *de)
-{
- struct ll_dentry_data *lld = kzalloc(sizeof(*lld), GFP_KERNEL);
-
- if (unlikely(!lld))
- return -ENOMEM;
- lld->lld_invalid = 1;
- de->d_fsdata = lld;
- return 0;
-}
-
-void ll_intent_drop_lock(struct lookup_intent *it)
-{
- if (it->it_op && it->it_lock_mode) {
- struct lustre_handle handle;
-
- handle.cookie = it->it_lock_handle;
-
- CDEBUG(D_DLMTRACE,
- "releasing lock with cookie %#llx from it %p\n",
- handle.cookie, it);
- ldlm_lock_decref(&handle, it->it_lock_mode);
-
- /* bug 494: intent_release may be called multiple times, from
- * this thread and we don't want to double-decref this lock
- */
- it->it_lock_mode = 0;
- if (it->it_remote_lock_mode != 0) {
- handle.cookie = it->it_remote_lock_handle;
-
- CDEBUG(D_DLMTRACE,
- "releasing remote lock with cookie%#llx from it %p\n",
- handle.cookie, it);
- ldlm_lock_decref(&handle,
- it->it_remote_lock_mode);
- it->it_remote_lock_mode = 0;
- }
- }
-}
-
-void ll_intent_release(struct lookup_intent *it)
-{
- CDEBUG(D_INFO, "intent %p released\n", it);
- ll_intent_drop_lock(it);
- /* We are still holding extra reference on a request, need to free it */
- if (it_disposition(it, DISP_ENQ_OPEN_REF))
- ptlrpc_req_finished(it->it_request); /* ll_file_open */
-
- if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
- ptlrpc_req_finished(it->it_request);
-
- it->it_disposition = 0;
- it->it_request = NULL;
-}
-
-void ll_invalidate_aliases(struct inode *inode)
-{
- struct dentry *dentry;
-
- CDEBUG(D_INODE, "marking dentries for ino " DFID "(%p) invalid\n",
- PFID(ll_inode2fid(inode)), inode);
-
- spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
- CDEBUG(D_DENTRY,
- "dentry in drop %pd (%p) parent %p inode %p flags %d\n",
- dentry, dentry, dentry->d_parent,
- d_inode(dentry), dentry->d_flags);
-
- d_lustre_invalidate(dentry, 0);
- }
- spin_unlock(&inode->i_lock);
-}
-
-int ll_revalidate_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it,
- struct inode *inode)
-{
- int rc = 0;
-
- if (!request)
- return 0;
-
- if (it_disposition(it, DISP_LOOKUP_NEG))
- return -ENOENT;
-
- rc = ll_prep_inode(&inode, request, NULL, it);
-
- return rc;
-}
-
-void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
-{
- if (it->it_lock_mode && inode) {
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- CDEBUG(D_DLMTRACE, "setting l_data to inode " DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
- ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
- }
-
- /* drop lookup or getattr locks immediately */
- if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) {
- /* on 2.6 there are situation when several lookups and
- * revalidations may be requested during single operation.
- * therefore, we don't release intent here -bzzz
- */
- ll_intent_drop_lock(it);
- }
-}
-
-static int ll_revalidate_dentry(struct dentry *dentry,
- unsigned int lookup_flags)
-{
- struct inode *dir = d_inode(dentry->d_parent);
-
- /* If this is intermediate component path lookup and we were able to get
- * to this dentry, then its lock has not been revoked and the
- * path component is valid.
- */
- if (lookup_flags & LOOKUP_PARENT)
- return 1;
-
- /* Symlink - always valid as long as the dentry was found */
- if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))
- return 1;
-
- /*
- * VFS warns us that this is the second go around and previous
- * operation failed (most likely open|creat), so this time
- * we better talk to the server via the lookup path by name,
- * not by fid.
- */
- if (lookup_flags & LOOKUP_REVAL)
- return 0;
-
- if (!dentry_may_statahead(dir, dentry))
- return 1;
-
- if (lookup_flags & LOOKUP_RCU)
- return -ECHILD;
-
- ll_statahead(dir, &dentry, !d_inode(dentry));
- return 1;
-}
-
-/*
- * Always trust cached dentries. Update statahead window if necessary.
- */
-static int ll_revalidate_nd(struct dentry *dentry, unsigned int flags)
-{
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, flags=%u\n",
- dentry, flags);
-
- return ll_revalidate_dentry(dentry, flags);
-}
-
-const struct dentry_operations ll_d_ops = {
- .d_init = ll_d_init,
- .d_revalidate = ll_revalidate_nd,
- .d_release = ll_release,
- .d_delete = ll_ddelete,
- .d_compare = ll_dcompare,
-};
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
deleted file mode 100644
index d10d27268323..000000000000
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ /dev/null
@@ -1,1706 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/dir.c
- *
- * Directory code for lustre client.
- */
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/mm.h>
-#include <linux/uaccess.h>
-#include <linux/buffer_head.h> /* for wait_on_buffer */
-#include <linux/pagevec.h>
-#include <linux/prefetch.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <obd_class.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_lib.h>
-#include <lustre_dlm.h>
-#include <lustre_fid.h>
-#include <lustre_kernelcomm.h>
-#include <lustre_swab.h>
-
-#include "llite_internal.h"
-
-/*
- * (new) readdir implementation overview.
- *
- * Original lustre readdir implementation cached exact copy of raw directory
- * pages on the client. These pages were indexed in client page cache by
- * logical offset in the directory file. This design, while very simple and
- * intuitive had some inherent problems:
- *
- * . it implies that byte offset to the directory entry serves as a
- * telldir(3)/seekdir(3) cookie, but that offset is not stable: in
- * ext3/htree directory entries may move due to splits, and more
- * importantly,
- *
- * . it is incompatible with the design of split directories for cmd3,
- * that assumes that names are distributed across nodes based on their
- * hash, and so readdir should be done in hash order.
- *
- * New readdir implementation does readdir in hash order, and uses hash of a
- * file name as a telldir/seekdir cookie. This led to number of complications:
- *
- * . hash is not unique, so it cannot be used to index cached directory
- * pages on the client (note, that it requires a whole pageful of hash
- * collided entries to cause two pages to have identical hashes);
- *
- * . hash is not unique, so it cannot, strictly speaking, be used as an
- * entry cookie. ext3/htree has the same problem and lustre implementation
- * mimics their solution: seekdir(hash) positions directory at the first
- * entry with the given hash.
- *
- * Client side.
- *
- * 0. caching
- *
- * Client caches directory pages using hash of the first entry as an index. As
- * noted above hash is not unique, so this solution doesn't work as is:
- * special processing is needed for "page hash chains" (i.e., sequences of
- * pages filled with entries all having the same hash value).
- *
- * First, such chains have to be detected. To this end, server returns to the
- * client the hash of the first entry on the page next to one returned. When
- * client detects that this hash is the same as hash of the first entry on the
- * returned page, page hash collision has to be handled. Pages in the
- * hash chain, except first one, are termed "overflow pages".
- *
- * Solution to index uniqueness problem is to not cache overflow
- * pages. Instead, when page hash collision is detected, all overflow pages
- * from emerging chain are immediately requested from the server and placed in
- * a special data structure (struct ll_dir_chain). This data structure is used
- * by ll_readdir() to process entries from overflow pages. When readdir
- * invocation finishes, overflow pages are discarded. If page hash collision
- * chain weren't completely processed, next call to readdir will again detect
- * page hash collision, again read overflow pages in, process next portion of
- * entries and again discard the pages. This is not as wasteful as it looks,
- * because, given reasonable hash, page hash collisions are extremely rare.
- *
- * 1. directory positioning
- *
- * When seekdir(hash) is called, original
- *
- *
- *
- *
- *
- *
- *
- *
- * Server.
- *
- * identification of and access to overflow pages
- *
- * page format
- *
- * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains
- * a header lu_dirpage which describes the start/end hash, and whether this
- * page is empty (contains no dir entry) or hash collide with next page.
- * After client receives reply, several pages will be integrated into dir page
- * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the lu_dirpage
- * for this integrated page will be adjusted. See lmv_adjust_dirpages().
- *
- */
-struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
- __u64 offset)
-{
- struct md_callback cb_op;
- struct page *page;
- int rc;
-
- cb_op.md_blocking_ast = ll_md_blocking_ast;
- rc = md_read_page(ll_i2mdexp(dir), op_data, &cb_op, offset, &page);
- if (rc)
- return ERR_PTR(rc);
-
- return page;
-}
-
-void ll_release_page(struct inode *inode, struct page *page, bool remove)
-{
- kunmap(page);
-
- /*
- * Always remove the page for striped dir, because the page is
- * built from temporarily in LMV layer
- */
- if (inode && S_ISDIR(inode->i_mode) &&
- ll_i2info(inode)->lli_lsm_md) {
- __free_page(page);
- return;
- }
-
- if (remove) {
- lock_page(page);
- if (likely(page->mapping))
- truncate_complete_page(page->mapping, page);
- unlock_page(page);
- }
- put_page(page);
-}
-
-/**
- * return IF_* type for given lu_dirent entry.
- * IF_* flag shld be converted to particular OS file type in
- * platform llite module.
- */
-static __u16 ll_dirent_type_get(struct lu_dirent *ent)
-{
- __u16 type = 0;
- struct luda_type *lt;
- int len = 0;
-
- if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
- const unsigned int align = sizeof(struct luda_type) - 1;
-
- len = le16_to_cpu(ent->lde_namelen);
- len = (len + align) & ~align;
- lt = (void *)ent->lde_name + len;
- type = IFTODT(le16_to_cpu(lt->lt_type));
- }
- return type;
-}
-
-int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
- struct dir_context *ctx)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- __u64 pos = *ppos;
- int is_api32 = ll_need_32bit_api(sbi);
- int is_hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
- struct page *page;
- bool done = false;
- int rc = 0;
-
- page = ll_get_dir_page(inode, op_data, pos);
-
- while (rc == 0 && !done) {
- struct lu_dirpage *dp;
- struct lu_dirent *ent;
- __u64 hash;
- __u64 next;
-
- if (IS_ERR(page)) {
- rc = PTR_ERR(page);
- break;
- }
-
- hash = MDS_DIR_END_OFF;
- dp = page_address(page);
- for (ent = lu_dirent_start(dp); ent && !done;
- ent = lu_dirent_next(ent)) {
- __u16 type;
- int namelen;
- struct lu_fid fid;
- __u64 lhash;
- __u64 ino;
-
- hash = le64_to_cpu(ent->lde_hash);
- if (hash < pos)
- /*
- * Skip until we find target hash
- * value.
- */
- continue;
-
- namelen = le16_to_cpu(ent->lde_namelen);
- if (namelen == 0)
- /*
- * Skip dummy record.
- */
- continue;
-
- if (is_api32 && is_hash64)
- lhash = hash >> 32;
- else
- lhash = hash;
- fid_le_to_cpu(&fid, &ent->lde_fid);
- ino = cl_fid_build_ino(&fid, is_api32);
- type = ll_dirent_type_get(ent);
- ctx->pos = lhash;
- /* For 'll_nfs_get_name_filldir()', it will try
- * to access the 'ent' through its 'lde_name',
- * so the parameter 'name' for 'ctx->actor()'
- * must be part of the 'ent'.
- */
- done = !dir_emit(ctx, ent->lde_name,
- namelen, ino, type);
- }
-
- if (done) {
- pos = hash;
- ll_release_page(inode, page, false);
- break;
- }
-
- next = le64_to_cpu(dp->ldp_hash_end);
- pos = next;
- if (pos == MDS_DIR_END_OFF) {
- /*
- * End of directory reached.
- */
- done = 1;
- ll_release_page(inode, page, false);
- } else {
- /*
- * Normal case: continue to the next
- * page.
- */
- ll_release_page(inode, page,
- le32_to_cpu(dp->ldp_flags) &
- LDF_COLLIDE);
- next = pos;
- page = ll_get_dir_page(inode, op_data, pos);
- }
- }
-
- ctx->pos = pos;
- return rc;
-}
-
-static int ll_readdir(struct file *filp, struct dir_context *ctx)
-{
- struct inode *inode = file_inode(filp);
- struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- __u64 pos = lfd ? lfd->lfd_pos : 0;
- int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
- int api32 = ll_need_32bit_api(sbi);
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op:inode=" DFID "(%p) pos/size %lu/%llu 32bit_api %d\n",
- PFID(ll_inode2fid(inode)), inode, (unsigned long)pos,
- i_size_read(inode), api32);
-
- if (pos == MDS_DIR_END_OFF) {
- /*
- * end-of-file.
- */
- rc = 0;
- goto out;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
- LUSTRE_OPC_ANY, inode);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- if (unlikely(op_data->op_mea1)) {
- /*
- * This is only needed for striped dir to fill ..,
- * see lmv_read_page
- */
- if (file_dentry(filp)->d_parent &&
- file_dentry(filp)->d_parent->d_inode) {
- __u64 ibits = MDS_INODELOCK_UPDATE;
- struct inode *parent;
-
- parent = file_dentry(filp)->d_parent->d_inode;
- if (ll_have_md_lock(parent, &ibits, LCK_MINMODE))
- op_data->op_fid3 = *ll_inode2fid(parent);
- }
-
- /*
- * If it can not find in cache, do lookup .. on the master
- * object
- */
- if (fid_is_zero(&op_data->op_fid3)) {
- rc = ll_dir_get_parent_fid(inode, &op_data->op_fid3);
- if (rc) {
- ll_finish_md_op_data(op_data);
- return rc;
- }
- }
- }
- op_data->op_max_pages = sbi->ll_md_brw_pages;
- ctx->pos = pos;
- rc = ll_dir_read(inode, &pos, op_data, ctx);
- pos = ctx->pos;
- if (lfd)
- lfd->lfd_pos = pos;
-
- if (pos == MDS_DIR_END_OFF) {
- if (api32)
- pos = LL_DIR_END_OFF_32BIT;
- else
- pos = LL_DIR_END_OFF;
- } else {
- if (api32 && hash64)
- pos >>= 32;
- }
- ctx->pos = pos;
- ll_finish_md_op_data(op_data);
-out:
- if (!rc)
- ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1);
-
- return rc;
-}
-
-static int ll_send_mgc_param(struct obd_export *mgc, char *string)
-{
- struct mgs_send_param *msp;
- int rc = 0;
-
- msp = kzalloc(sizeof(*msp), GFP_NOFS);
- if (!msp)
- return -ENOMEM;
-
- strlcpy(msp->mgs_param, string, sizeof(msp->mgs_param));
- rc = obd_set_info_async(NULL, mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO,
- sizeof(struct mgs_send_param), msp, NULL);
- if (rc)
- CERROR("Failed to set parameter: %d\n", rc);
- kfree(msp);
-
- return rc;
-}
-
-/**
- * Create striped directory with specified stripe(@lump)
- *
- * param[in] parent the parent of the directory.
- * param[in] lump the specified stripes.
- * param[in] dirname the name of the directory.
- * param[in] mode the specified mode of the directory.
- *
- * retval =0 if striped directory is being created successfully.
- * <0 if the creation is failed.
- */
-static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump,
- const char *dirname, umode_t mode)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- struct ll_sb_info *sbi = ll_i2sbi(parent);
- struct inode *inode = NULL;
- struct dentry dentry;
- int err;
-
- if (unlikely(lump->lum_magic != LMV_USER_MAGIC))
- return -EINVAL;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p) name %s stripe_offset %d, stripe_count: %u\n",
- PFID(ll_inode2fid(parent)), parent, dirname,
- (int)lump->lum_stripe_offset, lump->lum_stripe_count);
-
- if (lump->lum_stripe_count > 1 &&
- !(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_DIR_STRIPE))
- return -EINVAL;
-
- if (lump->lum_magic != cpu_to_le32(LMV_USER_MAGIC))
- lustre_swab_lmv_user_md(lump);
-
- if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
- mode &= ~current_umask();
- mode = (mode & (0777 | S_ISVTX)) | S_IFDIR;
- op_data = ll_prep_md_op_data(NULL, parent, NULL, dirname,
- strlen(dirname), mode, LUSTRE_OPC_MKDIR,
- lump);
- if (IS_ERR(op_data)) {
- err = PTR_ERR(op_data);
- goto err_exit;
- }
-
- op_data->op_cli_flags |= CLI_SET_MEA;
- err = md_create(sbi->ll_md_exp, op_data, lump, sizeof(*lump), mode,
- from_kuid(&init_user_ns, current_fsuid()),
- from_kgid(&init_user_ns, current_fsgid()),
- cfs_curproc_cap_pack(), 0, &request);
- ll_finish_md_op_data(op_data);
-
- err = ll_prep_inode(&inode, request, parent->i_sb, NULL);
- if (err)
- goto err_exit;
-
- memset(&dentry, 0, sizeof(dentry));
- dentry.d_inode = inode;
-
- err = ll_init_security(&dentry, inode, parent);
- iput(inode);
-
-err_exit:
- ptlrpc_req_finished(request);
- return err;
-}
-
-int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
- int set_default)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- int rc = 0;
- struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
- struct obd_device *mgc = lsi->lsi_mgc;
- int lum_size;
-
- if (lump) {
- /*
- * This is coming from userspace, so should be in
- * local endian. But the MDS would like it in little
- * endian, so we swab it before we send it.
- */
- switch (lump->lmm_magic) {
- case LOV_USER_MAGIC_V1: {
- if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
- lustre_swab_lov_user_md_v1(lump);
- lum_size = sizeof(struct lov_user_md_v1);
- break;
- }
- case LOV_USER_MAGIC_V3: {
- if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
- lustre_swab_lov_user_md_v3(
- (struct lov_user_md_v3 *)lump);
- lum_size = sizeof(struct lov_user_md_v3);
- break;
- }
- case LMV_USER_MAGIC: {
- if (lump->lmm_magic != cpu_to_le32(LMV_USER_MAGIC))
- lustre_swab_lmv_user_md(
- (struct lmv_user_md *)lump);
- lum_size = sizeof(struct lmv_user_md);
- break;
- }
- default: {
- CDEBUG(D_IOCTL,
- "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
- lump->lmm_magic, LOV_USER_MAGIC_V1,
- LOV_USER_MAGIC_V3);
- return -EINVAL;
- }
- }
- } else {
- lum_size = sizeof(struct lov_user_md_v1);
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- /* swabbing is done in lov_setstripe() on server side */
- rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, &req);
- ll_finish_md_op_data(op_data);
- ptlrpc_req_finished(req);
- if (rc)
- return rc;
-
-#if OBD_OCD_VERSION(2, 13, 53, 0) > LUSTRE_VERSION_CODE
- /*
- * 2.9 server has stored filesystem default stripe in ROOT xattr,
- * and it's stored into system config for backward compatibility.
- *
- * In the following we use the fact that LOV_USER_MAGIC_V1 and
- * LOV_USER_MAGIC_V3 have the same initial fields so we do not
- * need to make the distinction between the 2 versions
- */
- if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
- char *param = NULL;
- char *buf;
-
- param = kzalloc(MGS_PARAM_MAXLEN, GFP_NOFS);
- if (!param)
- return -ENOMEM;
-
- buf = param;
- /* Get fsname and assume devname to be -MDT0000. */
- ll_get_fsname(inode->i_sb, buf, MTI_NAME_MAXLEN);
- strcat(buf, "-MDT0000.lov");
- buf += strlen(buf);
-
- /* Set root stripesize */
- sprintf(buf, ".stripesize=%u",
- lump ? le32_to_cpu(lump->lmm_stripe_size) : 0);
- rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
- if (rc)
- goto end;
-
- /* Set root stripecount */
- sprintf(buf, ".stripecount=%hd",
- lump ? le16_to_cpu(lump->lmm_stripe_count) : 0);
- rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
- if (rc)
- goto end;
-
- /* Set root stripeoffset */
- sprintf(buf, ".stripeoffset=%hd",
- lump ? le16_to_cpu(lump->lmm_stripe_offset) :
- (typeof(lump->lmm_stripe_offset))(-1));
- rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
-
-end:
- kfree(param);
- }
-#endif
- return rc;
-}
-
-/**
- * This function will be used to get default LOV/LMV/Default LMV
- * @valid will be used to indicate which stripe it will retrieve
- * OBD_MD_MEA LMV stripe EA
- * OBD_MD_DEFAULT_MEA Default LMV stripe EA
- * otherwise Default LOV EA.
- * Each time, it can only retrieve 1 stripe EA
- **/
-int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size,
- struct ptlrpc_request **request, u64 valid)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct mdt_body *body;
- struct lov_mds_md *lmm = NULL;
- struct ptlrpc_request *req = NULL;
- int rc, lmmsize;
- struct md_op_data *op_data;
-
- rc = ll_get_max_mdsize(sbi, &lmmsize);
- if (rc)
- return rc;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
- 0, lmmsize, LUSTRE_OPC_ANY,
- NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- CDEBUG(D_INFO, "md_getattr failed on inode " DFID ": rc %d\n",
- PFID(ll_inode2fid(inode)), rc);
- goto out;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- lmmsize = body->mbo_eadatasize;
-
- if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
- lmmsize == 0) {
- rc = -ENODATA;
- goto out;
- }
-
- lmm = req_capsule_server_sized_get(&req->rq_pill,
- &RMF_MDT_MD, lmmsize);
- LASSERT(lmm);
-
- /*
- * This is coming from the MDS, so is probably in
- * little endian. We convert it to host endian before
- * passing it to userspace.
- */
- /* We don't swab objects for directories */
- switch (le32_to_cpu(lmm->lmm_magic)) {
- case LOV_MAGIC_V1:
- if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC)
- lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
- break;
- case LOV_MAGIC_V3:
- if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC)
- lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
- break;
- case LMV_MAGIC_V1:
- if (cpu_to_le32(LMV_MAGIC) != LMV_MAGIC)
- lustre_swab_lmv_mds_md((union lmv_mds_md *)lmm);
- break;
- case LMV_USER_MAGIC:
- if (cpu_to_le32(LMV_USER_MAGIC) != LMV_USER_MAGIC)
- lustre_swab_lmv_user_md((struct lmv_user_md *)lmm);
- break;
- default:
- CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
- rc = -EPROTO;
- }
-out:
- *plmm = lmm;
- *plmm_size = lmmsize;
- *request = req;
- return rc;
-}
-
-int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid)
-{
- struct md_op_data *op_data;
- int mdt_index, rc;
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return -ENOMEM;
-
- op_data->op_flags |= MF_GET_MDT_IDX;
- op_data->op_fid1 = *fid;
- rc = md_getattr(sbi->ll_md_exp, op_data, NULL);
- mdt_index = op_data->op_mds;
- kvfree(op_data);
- if (rc < 0)
- return rc;
-
- return mdt_index;
-}
-
-/*
- * Get MDT index for the inode.
- */
-int ll_get_mdt_idx(struct inode *inode)
-{
- return ll_get_mdt_idx_by_fid(ll_i2sbi(inode), ll_inode2fid(inode));
-}
-
-/**
- * Generic handler to do any pre-copy work.
- *
- * It sends a first hsm_progress (with extent length == 0) to coordinator as a
- * first information for it that real work has started.
- *
- * Moreover, for a ARCHIVE request, it will sample the file data version and
- * store it in \a copy.
- *
- * \return 0 on success.
- */
-static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct hsm_progress_kernel hpk;
- int rc2, rc = 0;
-
- /* Forge a hsm_progress based on data from copy. */
- hpk.hpk_fid = copy->hc_hai.hai_fid;
- hpk.hpk_cookie = copy->hc_hai.hai_cookie;
- hpk.hpk_extent.offset = copy->hc_hai.hai_extent.offset;
- hpk.hpk_extent.length = 0;
- hpk.hpk_flags = 0;
- hpk.hpk_errval = 0;
- hpk.hpk_data_version = 0;
-
- /* For archive request, we need to read the current file version. */
- if (copy->hc_hai.hai_action == HSMA_ARCHIVE) {
- struct inode *inode;
- __u64 data_version = 0;
-
- /* Get inode for this fid */
- inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
- if (IS_ERR(inode)) {
- hpk.hpk_flags |= HP_FLAG_RETRY;
- /* hpk_errval is >= 0 */
- hpk.hpk_errval = -PTR_ERR(inode);
- rc = PTR_ERR(inode);
- goto progress;
- }
-
- /* Read current file data version */
- rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
- iput(inode);
- if (rc != 0) {
- CDEBUG(D_HSM,
- "Could not read file data version of " DFID " (rc = %d). Archive request (%#llx) could not be done.\n",
- PFID(&copy->hc_hai.hai_fid), rc,
- copy->hc_hai.hai_cookie);
- hpk.hpk_flags |= HP_FLAG_RETRY;
- /* hpk_errval must be >= 0 */
- hpk.hpk_errval = -rc;
- goto progress;
- }
-
- /* Store in the hsm_copy for later copytool use.
- * Always modified even if no lsm.
- */
- copy->hc_data_version = data_version;
- }
-
-progress:
- /* On error, the request should be considered as completed */
- if (hpk.hpk_errval > 0)
- hpk.hpk_flags |= HP_FLAG_COMPLETED;
- rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
- &hpk, NULL);
-
- return rc ? rc : rc2;
-}
-
-/**
- * Generic handler to do any post-copy work.
- *
- * It will send the last hsm_progress update to coordinator to inform it
- * that copy is finished and whether it was successful or not.
- *
- * Moreover,
- * - for ARCHIVE request, it will sample the file data version and compare it
- * with the version saved in ll_ioc_copy_start(). If they do not match, copy
- * will be considered as failed.
- * - for RESTORE request, it will sample the file data version and send it to
- * coordinator which is useful if the file was imported as 'released'.
- *
- * \return 0 on success.
- */
-static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct hsm_progress_kernel hpk;
- int rc2, rc = 0;
-
- /* If you modify the logic here, also check llapi_hsm_copy_end(). */
- /* Take care: copy->hc_hai.hai_action, len, gid and data are not
- * initialized if copy_end was called with copy == NULL.
- */
-
- /* Forge a hsm_progress based on data from copy. */
- hpk.hpk_fid = copy->hc_hai.hai_fid;
- hpk.hpk_cookie = copy->hc_hai.hai_cookie;
- hpk.hpk_extent = copy->hc_hai.hai_extent;
- hpk.hpk_flags = copy->hc_flags | HP_FLAG_COMPLETED;
- hpk.hpk_errval = copy->hc_errval;
- hpk.hpk_data_version = 0;
-
- /* For archive request, we need to check the file data was not changed.
- *
- * For restore request, we need to send the file data version, this is
- * useful when the file was created using hsm_import.
- */
- if (((copy->hc_hai.hai_action == HSMA_ARCHIVE) ||
- (copy->hc_hai.hai_action == HSMA_RESTORE)) &&
- (copy->hc_errval == 0)) {
- struct inode *inode;
- __u64 data_version = 0;
-
- /* Get lsm for this fid */
- inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
- if (IS_ERR(inode)) {
- hpk.hpk_flags |= HP_FLAG_RETRY;
- /* hpk_errval must be >= 0 */
- hpk.hpk_errval = -PTR_ERR(inode);
- rc = PTR_ERR(inode);
- goto progress;
- }
-
- rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH);
- iput(inode);
- if (rc) {
- CDEBUG(D_HSM,
- "Could not read file data version. Request could not be confirmed.\n");
- if (hpk.hpk_errval == 0)
- hpk.hpk_errval = -rc;
- goto progress;
- }
-
- /* Store in the hsm_copy for later copytool use.
- * Always modified even if no lsm.
- */
- hpk.hpk_data_version = data_version;
-
- /* File could have been stripped during archiving, so we need
- * to check anyway.
- */
- if ((copy->hc_hai.hai_action == HSMA_ARCHIVE) &&
- (copy->hc_data_version != data_version)) {
- CDEBUG(D_HSM, "File data version mismatched. File content was changed during archiving. " DFID ", start:%#llx current:%#llx\n",
- PFID(&copy->hc_hai.hai_fid),
- copy->hc_data_version, data_version);
- /* File was changed, send error to cdt. Do not ask for
- * retry because if a file is modified frequently,
- * the cdt will loop on retried archive requests.
- * The policy engine will ask for a new archive later
- * when the file will not be modified for some tunable
- * time
- */
- hpk.hpk_flags &= ~HP_FLAG_RETRY;
- rc = -EBUSY;
- /* hpk_errval must be >= 0 */
- hpk.hpk_errval = -rc;
- }
- }
-
-progress:
- rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
- &hpk, NULL);
-
- return rc ? rc : rc2;
-}
-
-static int copy_and_ioctl(int cmd, struct obd_export *exp,
- const void __user *data, size_t size)
-{
- void *copy;
- int rc;
-
- copy = memdup_user(data, size);
- if (IS_ERR(copy))
- return PTR_ERR(copy);
-
- rc = obd_iocontrol(cmd, exp, size, copy, NULL);
- kfree(copy);
-
- return rc;
-}
-
-static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
-{
- int cmd = qctl->qc_cmd;
- int type = qctl->qc_type;
- int id = qctl->qc_id;
- int valid = qctl->qc_valid;
- int rc = 0;
-
- switch (cmd) {
- case Q_SETQUOTA:
- case Q_SETINFO:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
- case Q_GETQUOTA:
- if (((type == USRQUOTA &&
- !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) ||
- (type == GRPQUOTA &&
- !in_egroup_p(make_kgid(&init_user_ns, id)))) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
- case Q_GETINFO:
- break;
- default:
- CERROR("unsupported quotactl op: %#x\n", cmd);
- return -ENOTTY;
- }
-
- if (valid != QC_GENERAL) {
- if (cmd == Q_GETINFO)
- qctl->qc_cmd = Q_GETOINFO;
- else if (cmd == Q_GETQUOTA)
- qctl->qc_cmd = Q_GETOQUOTA;
- else
- return -EINVAL;
-
- switch (valid) {
- case QC_MDTIDX:
- rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
- sizeof(*qctl), qctl, NULL);
- break;
- case QC_OSTIDX:
- rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_dt_exp,
- sizeof(*qctl), qctl, NULL);
- break;
- case QC_UUID:
- rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
- sizeof(*qctl), qctl, NULL);
- if (rc == -EAGAIN)
- rc = obd_iocontrol(OBD_IOC_QUOTACTL,
- sbi->ll_dt_exp,
- sizeof(*qctl), qctl, NULL);
- break;
- default:
- rc = -EINVAL;
- break;
- }
-
- if (rc)
- return rc;
-
- qctl->qc_cmd = cmd;
- } else {
- struct obd_quotactl *oqctl;
-
- oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
- if (!oqctl)
- return -ENOMEM;
-
- QCTL_COPY(oqctl, qctl);
- rc = obd_quotactl(sbi->ll_md_exp, oqctl);
- if (rc) {
- kfree(oqctl);
- return rc;
- }
- /* If QIF_SPACE is not set, client should collect the
- * space usage from OSSs by itself
- */
- if (cmd == Q_GETQUOTA &&
- !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) &&
- !oqctl->qc_dqblk.dqb_curspace) {
- struct obd_quotactl *oqctl_tmp;
-
- oqctl_tmp = kzalloc(sizeof(*oqctl_tmp), GFP_NOFS);
- if (!oqctl_tmp) {
- rc = -ENOMEM;
- goto out;
- }
-
- oqctl_tmp->qc_cmd = Q_GETOQUOTA;
- oqctl_tmp->qc_id = oqctl->qc_id;
- oqctl_tmp->qc_type = oqctl->qc_type;
-
- /* collect space usage from OSTs */
- oqctl_tmp->qc_dqblk.dqb_curspace = 0;
- rc = obd_quotactl(sbi->ll_dt_exp, oqctl_tmp);
- if (!rc || rc == -EREMOTEIO) {
- oqctl->qc_dqblk.dqb_curspace =
- oqctl_tmp->qc_dqblk.dqb_curspace;
- oqctl->qc_dqblk.dqb_valid |= QIF_SPACE;
- }
-
- /* collect space & inode usage from MDTs */
- oqctl_tmp->qc_dqblk.dqb_curspace = 0;
- oqctl_tmp->qc_dqblk.dqb_curinodes = 0;
- rc = obd_quotactl(sbi->ll_md_exp, oqctl_tmp);
- if (!rc || rc == -EREMOTEIO) {
- oqctl->qc_dqblk.dqb_curspace +=
- oqctl_tmp->qc_dqblk.dqb_curspace;
- oqctl->qc_dqblk.dqb_curinodes =
- oqctl_tmp->qc_dqblk.dqb_curinodes;
- oqctl->qc_dqblk.dqb_valid |= QIF_INODES;
- } else {
- oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE;
- }
-
- kfree(oqctl_tmp);
- }
-out:
- QCTL_COPY(qctl, oqctl);
- kfree(oqctl);
- }
-
- return rc;
-}
-
-/* This function tries to get a single name component,
- * to send to the server. No actual path traversal involved,
- * so we limit to NAME_MAX
- */
-static char *ll_getname(const char __user *filename)
-{
- int ret = 0, len;
- char *tmp;
-
- tmp = kzalloc(NAME_MAX + 1, GFP_KERNEL);
- if (!tmp)
- return ERR_PTR(-ENOMEM);
-
- len = strncpy_from_user(tmp, filename, NAME_MAX + 1);
- if (len < 0)
- ret = len;
- else if (len == 0)
- ret = -ENOENT;
- else if (len > NAME_MAX && tmp[NAME_MAX] != 0)
- ret = -ENAMETOOLONG;
-
- if (ret) {
- kfree(tmp);
- tmp = ERR_PTR(ret);
- }
- return tmp;
-}
-
-#define ll_putname(filename) kfree(filename)
-
-static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct inode *inode = file_inode(file);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct obd_ioctl_data *data;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), cmd=%#x\n",
- PFID(ll_inode2fid(inode)), inode, cmd);
-
- /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
- if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
- return -ENOTTY;
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
- switch (cmd) {
- case FSFILT_IOC_GETFLAGS:
- case FSFILT_IOC_SETFLAGS:
- return ll_iocontrol(inode, file, cmd, arg);
- case FSFILT_IOC_GETVERSION_OLD:
- case FSFILT_IOC_GETVERSION:
- return put_user(inode->i_generation, (int __user *)arg);
- /* We need to special case any other ioctls we want to handle,
- * to send them to the MDS/OST as appropriate and to properly
- * network encode the arg field.
- case FSFILT_IOC_SETVERSION_OLD:
- case FSFILT_IOC_SETVERSION:
- */
- case LL_IOC_GET_MDTIDX: {
- int mdtidx;
-
- mdtidx = ll_get_mdt_idx(inode);
- if (mdtidx < 0)
- return mdtidx;
-
- if (put_user((int)mdtidx, (int __user *)arg))
- return -EFAULT;
-
- return 0;
- }
- case IOC_MDC_LOOKUP: {
- int namelen, len = 0;
- char *buf = NULL;
- char *filename;
-
- rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
- if (rc)
- return rc;
- data = (void *)buf;
-
- filename = data->ioc_inlbuf1;
- namelen = strlen(filename);
-
- if (namelen < 1) {
- CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
- rc = -EINVAL;
- goto out_free;
- }
-
- rc = ll_get_fid_by_name(inode, filename, namelen, NULL, NULL);
- if (rc < 0) {
- CERROR("%s: lookup %.*s failed: rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), namelen,
- filename, rc);
- goto out_free;
- }
-out_free:
- kvfree(buf);
- return rc;
- }
- case LL_IOC_LMV_SETSTRIPE: {
- struct lmv_user_md *lum;
- char *buf = NULL;
- char *filename;
- int namelen = 0;
- int lumlen = 0;
- umode_t mode;
- int len;
- int rc;
-
- rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
- if (rc)
- return rc;
-
- data = (void *)buf;
- if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
- data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0) {
- rc = -EINVAL;
- goto lmv_out_free;
- }
-
- filename = data->ioc_inlbuf1;
- namelen = data->ioc_inllen1;
-
- if (namelen < 1) {
- CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
- rc = -EINVAL;
- goto lmv_out_free;
- }
- lum = (struct lmv_user_md *)data->ioc_inlbuf2;
- lumlen = data->ioc_inllen2;
-
- if (lum->lum_magic != LMV_USER_MAGIC ||
- lumlen != sizeof(*lum)) {
- CERROR("%s: wrong lum magic %x or size %d: rc = %d\n",
- filename, lum->lum_magic, lumlen, -EFAULT);
- rc = -EINVAL;
- goto lmv_out_free;
- }
-
-#if OBD_OCD_VERSION(2, 9, 50, 0) > LUSTRE_VERSION_CODE
- mode = data->ioc_type != 0 ? data->ioc_type : 0777;
-#else
- mode = data->ioc_type;
-#endif
- rc = ll_dir_setdirstripe(inode, lum, filename, mode);
-lmv_out_free:
- kvfree(buf);
- return rc;
- }
- case LL_IOC_LMV_SET_DEFAULT_STRIPE: {
- struct lmv_user_md __user *ulump;
- struct lmv_user_md lum;
- int rc;
-
- ulump = (struct lmv_user_md __user *)arg;
- if (copy_from_user(&lum, ulump, sizeof(lum)))
- return -EFAULT;
-
- if (lum.lum_magic != LMV_USER_MAGIC)
- return -EINVAL;
-
- rc = ll_dir_setstripe(inode, (struct lov_user_md *)&lum, 0);
-
- return rc;
- }
- case LL_IOC_LOV_SETSTRIPE: {
- struct lov_user_md_v3 lumv3;
- struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
- struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
- struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
-
- int set_default = 0;
-
- LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
- LASSERT(sizeof(lumv3.lmm_objects[0]) ==
- sizeof(lumv3p->lmm_objects[0]));
- /* first try with v1 which is smaller than v3 */
- if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1)))
- return -EFAULT;
-
- if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
- if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3)))
- return -EFAULT;
- }
-
- if (is_root_inode(inode))
- set_default = 1;
-
- /* in v1 and v3 cases lumv1 points to data */
- rc = ll_dir_setstripe(inode, lumv1, set_default);
-
- return rc;
- }
- case LL_IOC_LMV_GETSTRIPE: {
- struct lmv_user_md __user *ulmv;
- struct lmv_user_md lum;
- struct ptlrpc_request *request = NULL;
- struct lmv_user_md *tmp = NULL;
- union lmv_mds_md *lmm = NULL;
- u64 valid = 0;
- int max_stripe_count;
- int stripe_count;
- int mdt_index;
- int lum_size;
- int lmmsize;
- int rc;
- int i;
-
- ulmv = (struct lmv_user_md __user *)arg;
- if (copy_from_user(&lum, ulmv, sizeof(*ulmv)))
- return -EFAULT;
-
- max_stripe_count = lum.lum_stripe_count;
- /*
- * lum_magic will indicate which stripe the ioctl will like
- * to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC
- * is for default LMV stripe
- */
- if (lum.lum_magic == LMV_MAGIC_V1)
- valid |= OBD_MD_MEA;
- else if (lum.lum_magic == LMV_USER_MAGIC)
- valid |= OBD_MD_DEFAULT_MEA;
- else
- return -EINVAL;
-
- rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize, &request,
- valid);
- if (rc)
- goto finish_req;
-
- /* Get default LMV EA */
- if (lum.lum_magic == LMV_USER_MAGIC) {
- if (lmmsize > sizeof(*ulmv)) {
- rc = -EINVAL;
- goto finish_req;
- }
-
- if (copy_to_user(ulmv, lmm, lmmsize))
- rc = -EFAULT;
-
- goto finish_req;
- }
-
- stripe_count = lmv_mds_md_stripe_count_get(lmm);
- if (max_stripe_count < stripe_count) {
- lum.lum_stripe_count = stripe_count;
- if (copy_to_user(ulmv, &lum, sizeof(lum))) {
- rc = -EFAULT;
- goto finish_req;
- }
- rc = -E2BIG;
- goto finish_req;
- }
-
- lum_size = lmv_user_md_size(stripe_count, LMV_MAGIC_V1);
- tmp = kzalloc(lum_size, GFP_NOFS);
- if (!tmp) {
- rc = -ENOMEM;
- goto finish_req;
- }
-
- mdt_index = ll_get_mdt_idx(inode);
- if (mdt_index < 0) {
- rc = -ENOMEM;
- goto out_tmp;
- }
- tmp->lum_magic = LMV_MAGIC_V1;
- tmp->lum_stripe_count = 0;
- tmp->lum_stripe_offset = mdt_index;
- for (i = 0; i < stripe_count; i++) {
- struct lu_fid fid;
-
- fid_le_to_cpu(&fid, &lmm->lmv_md_v1.lmv_stripe_fids[i]);
- mdt_index = ll_get_mdt_idx_by_fid(sbi, &fid);
- if (mdt_index < 0) {
- rc = mdt_index;
- goto out_tmp;
- }
- tmp->lum_objects[i].lum_mds = mdt_index;
- tmp->lum_objects[i].lum_fid = fid;
- tmp->lum_stripe_count++;
- }
-
- if (copy_to_user(ulmv, tmp, lum_size)) {
- rc = -EFAULT;
- goto out_tmp;
- }
-out_tmp:
- kfree(tmp);
-finish_req:
- ptlrpc_req_finished(request);
- return rc;
- }
-
- case LL_IOC_LOV_SWAP_LAYOUTS:
- return -EPERM;
- case IOC_OBD_STATFS:
- return ll_obd_statfs(inode, (void __user *)arg);
- case LL_IOC_LOV_GETSTRIPE:
- case LL_IOC_MDC_GETINFO:
- case IOC_MDC_GETFILEINFO:
- case IOC_MDC_GETFILESTRIPE: {
- struct ptlrpc_request *request = NULL;
- struct lov_user_md __user *lump;
- struct lov_mds_md *lmm = NULL;
- struct mdt_body *body;
- char *filename = NULL;
- int lmmsize;
-
- if (cmd == IOC_MDC_GETFILEINFO ||
- cmd == IOC_MDC_GETFILESTRIPE) {
- filename = ll_getname((const char __user *)arg);
- if (IS_ERR(filename))
- return PTR_ERR(filename);
-
- rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
- &lmmsize, &request);
- } else {
- rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize,
- &request, 0);
- }
-
- if (request) {
- body = req_capsule_server_get(&request->rq_pill,
- &RMF_MDT_BODY);
- LASSERT(body);
- } else {
- goto out_req;
- }
-
- if (rc < 0) {
- if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO ||
- cmd == LL_IOC_MDC_GETINFO)) {
- rc = 0;
- goto skip_lmm;
- }
-
- goto out_req;
- }
-
- if (cmd == IOC_MDC_GETFILESTRIPE ||
- cmd == LL_IOC_LOV_GETSTRIPE) {
- lump = (struct lov_user_md __user *)arg;
- } else {
- struct lov_user_mds_data __user *lmdp;
-
- lmdp = (struct lov_user_mds_data __user *)arg;
- lump = &lmdp->lmd_lmm;
- }
- if (copy_to_user(lump, lmm, lmmsize)) {
- if (copy_to_user(lump, lmm, sizeof(*lump))) {
- rc = -EFAULT;
- goto out_req;
- }
- rc = -EOVERFLOW;
- }
-skip_lmm:
- if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
- struct lov_user_mds_data __user *lmdp;
- lstat_t st = { 0 };
-
- st.st_dev = inode->i_sb->s_dev;
- st.st_mode = body->mbo_mode;
- st.st_nlink = body->mbo_nlink;
- st.st_uid = body->mbo_uid;
- st.st_gid = body->mbo_gid;
- st.st_rdev = body->mbo_rdev;
- st.st_size = body->mbo_size;
- st.st_blksize = PAGE_SIZE;
- st.st_blocks = body->mbo_blocks;
- st.st_atime = body->mbo_atime;
- st.st_mtime = body->mbo_mtime;
- st.st_ctime = body->mbo_ctime;
- st.st_ino = cl_fid_build_ino(&body->mbo_fid1,
- sbi->ll_flags &
- LL_SBI_32BIT_API);
-
- lmdp = (struct lov_user_mds_data __user *)arg;
- if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) {
- rc = -EFAULT;
- goto out_req;
- }
- }
-
-out_req:
- ptlrpc_req_finished(request);
- if (filename)
- ll_putname(filename);
- return rc;
- }
- case OBD_IOC_QUOTACTL: {
- struct if_quotactl *qctl;
-
- qctl = kzalloc(sizeof(*qctl), GFP_NOFS);
- if (!qctl)
- return -ENOMEM;
-
- if (copy_from_user(qctl, (void __user *)arg, sizeof(*qctl))) {
- rc = -EFAULT;
- goto out_quotactl;
- }
-
- rc = quotactl_ioctl(sbi, qctl);
-
- if (rc == 0 && copy_to_user((void __user *)arg, qctl,
- sizeof(*qctl)))
- rc = -EFAULT;
-
-out_quotactl:
- kfree(qctl);
- return rc;
- }
- case OBD_IOC_GETDTNAME:
- case OBD_IOC_GETMDNAME:
- return ll_get_obd_name(inode, cmd, arg);
- case LL_IOC_FLUSHCTX:
- return ll_flush_ctx(inode);
- case LL_IOC_GETOBDCOUNT: {
- int count, vallen;
- struct obd_export *exp;
-
- if (copy_from_user(&count, (int __user *)arg, sizeof(int)))
- return -EFAULT;
-
- /* get ost count when count is zero, get mdt count otherwise */
- exp = count ? sbi->ll_md_exp : sbi->ll_dt_exp;
- vallen = sizeof(count);
- rc = obd_get_info(NULL, exp, sizeof(KEY_TGT_COUNT),
- KEY_TGT_COUNT, &vallen, &count);
- if (rc) {
- CERROR("get target count failed: %d\n", rc);
- return rc;
- }
-
- if (copy_to_user((int __user *)arg, &count, sizeof(int)))
- return -EFAULT;
-
- return 0;
- }
- case LL_IOC_PATH2FID:
- if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
- sizeof(struct lu_fid)))
- return -EFAULT;
- return 0;
- case LL_IOC_GET_CONNECT_FLAGS: {
- return obd_iocontrol(cmd, sbi->ll_md_exp, 0, NULL,
- (void __user *)arg);
- }
- case OBD_IOC_CHANGELOG_SEND:
- case OBD_IOC_CHANGELOG_CLEAR:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
- sizeof(struct ioc_changelog));
- return rc;
- case OBD_IOC_FID2PATH:
- return ll_fid2path(inode, (void __user *)arg);
- case LL_IOC_GETPARENT:
- return ll_getparent(file, (void __user *)arg);
- case LL_IOC_FID2MDTIDX: {
- struct obd_export *exp = ll_i2mdexp(inode);
- struct lu_fid fid;
- __u32 index;
-
- if (copy_from_user(&fid, (const struct lu_fid __user *)arg,
- sizeof(fid)))
- return -EFAULT;
-
- /* Call mdc_iocontrol */
- rc = obd_iocontrol(LL_IOC_FID2MDTIDX, exp, sizeof(fid), &fid,
- &index);
- if (rc)
- return rc;
-
- return index;
- }
- case LL_IOC_HSM_REQUEST: {
- struct hsm_user_request *hur;
- ssize_t totalsize;
-
- hur = memdup_user((void __user *)arg, sizeof(*hur));
- if (IS_ERR(hur))
- return PTR_ERR(hur);
-
- /* Compute the whole struct size */
- totalsize = hur_len(hur);
- kfree(hur);
- if (totalsize < 0)
- return -E2BIG;
-
- /* Final size will be more than double totalsize */
- if (totalsize >= MDS_MAXREQSIZE / 3)
- return -E2BIG;
-
- hur = kzalloc(totalsize, GFP_NOFS);
- if (!hur)
- return -ENOMEM;
-
- /* Copy the whole struct */
- if (copy_from_user(hur, (void __user *)arg, totalsize)) {
- kvfree(hur);
- return -EFAULT;
- }
-
- if (hur->hur_request.hr_action == HUA_RELEASE) {
- const struct lu_fid *fid;
- struct inode *f;
- int i;
-
- for (i = 0; i < hur->hur_request.hr_itemcount; i++) {
- fid = &hur->hur_user_item[i].hui_fid;
- f = search_inode_for_lustre(inode->i_sb, fid);
- if (IS_ERR(f)) {
- rc = PTR_ERR(f);
- break;
- }
-
- rc = ll_hsm_release(f);
- iput(f);
- if (rc != 0)
- break;
- }
- } else {
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize,
- hur, NULL);
- }
-
- kvfree(hur);
-
- return rc;
- }
- case LL_IOC_HSM_PROGRESS: {
- struct hsm_progress_kernel hpk;
- struct hsm_progress hp;
-
- if (copy_from_user(&hp, (void __user *)arg, sizeof(hp)))
- return -EFAULT;
-
- hpk.hpk_fid = hp.hp_fid;
- hpk.hpk_cookie = hp.hp_cookie;
- hpk.hpk_extent = hp.hp_extent;
- hpk.hpk_flags = hp.hp_flags;
- hpk.hpk_errval = hp.hp_errval;
- hpk.hpk_data_version = 0;
-
- /* File may not exist in Lustre; all progress
- * reported to Lustre root
- */
- rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(hpk), &hpk,
- NULL);
- return rc;
- }
- case LL_IOC_HSM_CT_START:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
- sizeof(struct lustre_kernelcomm));
- return rc;
-
- case LL_IOC_HSM_COPY_START: {
- struct hsm_copy *copy;
- int rc;
-
- copy = memdup_user((char __user *)arg, sizeof(*copy));
- if (IS_ERR(copy))
- return PTR_ERR(copy);
-
- rc = ll_ioc_copy_start(inode->i_sb, copy);
- if (copy_to_user((char __user *)arg, copy, sizeof(*copy)))
- rc = -EFAULT;
-
- kfree(copy);
- return rc;
- }
- case LL_IOC_HSM_COPY_END: {
- struct hsm_copy *copy;
- int rc;
-
- copy = memdup_user((char __user *)arg, sizeof(*copy));
- if (IS_ERR(copy))
- return PTR_ERR(copy);
-
- rc = ll_ioc_copy_end(inode->i_sb, copy);
- if (copy_to_user((char __user *)arg, copy, sizeof(*copy)))
- rc = -EFAULT;
-
- kfree(copy);
- return rc;
- }
- case LL_IOC_MIGRATE: {
- char *buf = NULL;
- const char *filename;
- int namelen = 0;
- int len;
- int rc;
- int mdtidx;
-
- rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
- if (rc < 0)
- return rc;
-
- data = (struct obd_ioctl_data *)buf;
- if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
- !data->ioc_inllen1 || !data->ioc_inllen2) {
- rc = -EINVAL;
- goto migrate_free;
- }
-
- filename = data->ioc_inlbuf1;
- namelen = data->ioc_inllen1;
- if (namelen < 1 || namelen != strlen(filename) + 1) {
- rc = -EINVAL;
- goto migrate_free;
- }
-
- if (data->ioc_inllen2 != sizeof(mdtidx)) {
- rc = -EINVAL;
- goto migrate_free;
- }
- mdtidx = *(int *)data->ioc_inlbuf2;
-
- rc = ll_migrate(inode, file, mdtidx, filename, namelen - 1);
-migrate_free:
- kvfree(buf);
-
- return rc;
- }
-
- default:
- return obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL,
- (void __user *)arg);
- }
-}
-
-static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
-{
- struct inode *inode = file->f_mapping->host;
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int api32 = ll_need_32bit_api(sbi);
- loff_t ret = -EINVAL;
-
- switch (origin) {
- case SEEK_SET:
- break;
- case SEEK_CUR:
- offset += file->f_pos;
- break;
- case SEEK_END:
- if (offset > 0)
- goto out;
- if (api32)
- offset += LL_DIR_END_OFF_32BIT;
- else
- offset += LL_DIR_END_OFF;
- break;
- default:
- goto out;
- }
-
- if (offset >= 0 &&
- ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
- (!api32 && offset <= LL_DIR_END_OFF))) {
- if (offset != file->f_pos) {
- if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
- (!api32 && offset == LL_DIR_END_OFF))
- fd->lfd_pos = MDS_DIR_END_OFF;
- else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
- fd->lfd_pos = offset << 32;
- else
- fd->lfd_pos = offset;
- file->f_pos = offset;
- }
- ret = offset;
- }
- goto out;
-
-out:
- return ret;
-}
-
-static int ll_dir_open(struct inode *inode, struct file *file)
-{
- return ll_file_open(inode, file);
-}
-
-static int ll_dir_release(struct inode *inode, struct file *file)
-{
- return ll_file_release(inode, file);
-}
-
-const struct file_operations ll_dir_operations = {
- .llseek = ll_dir_seek,
- .open = ll_dir_open,
- .release = ll_dir_release,
- .read = generic_read_dir,
- .iterate_shared = ll_readdir,
- .unlocked_ioctl = ll_dir_ioctl,
- .fsync = ll_fsync,
-};
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
deleted file mode 100644
index ca5faea13b7e..000000000000
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ /dev/null
@@ -1,3600 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/file.c
- *
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Andreas Dilger <adilger@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-#include <lustre_dlm.h>
-#include <linux/pagemap.h>
-#include <linux/file.h>
-#include <linux/sched.h>
-#include <linux/mount.h>
-#include <uapi/linux/lustre/lustre_fiemap.h>
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_swab.h>
-
-#include <cl_object.h>
-#include "llite_internal.h"
-
-static int
-ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
-
-static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
- bool *lease_broken);
-
-static enum llioc_iter
-ll_iocontrol_call(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg, int *rcp);
-
-static struct ll_file_data *ll_file_data_get(void)
-{
- struct ll_file_data *fd;
-
- fd = kmem_cache_zalloc(ll_file_data_slab, GFP_NOFS);
- if (!fd)
- return NULL;
- fd->fd_write_failed = false;
- return fd;
-}
-
-static void ll_file_data_put(struct ll_file_data *fd)
-{
- if (fd)
- kmem_cache_free(ll_file_data_slab, fd);
-}
-
-/**
- * Packs all the attributes into @op_data for the CLOSE rpc.
- */
-static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
- struct obd_client_handle *och)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- ll_prep_md_op_data(op_data, inode, NULL, NULL,
- 0, 0, LUSTRE_OPC_ANY, NULL);
-
- op_data->op_attr.ia_mode = inode->i_mode;
- op_data->op_attr.ia_atime = inode->i_atime;
- op_data->op_attr.ia_mtime = inode->i_mtime;
- op_data->op_attr.ia_ctime = inode->i_ctime;
- op_data->op_attr.ia_size = i_size_read(inode);
- op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
- ATTR_MTIME | ATTR_MTIME_SET |
- ATTR_CTIME | ATTR_CTIME_SET;
- op_data->op_attr_blocks = inode->i_blocks;
- op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
- op_data->op_handle = och->och_fh;
-
- /*
- * For HSM: if inode data has been modified, pack it so that
- * MDT can set data dirty flag in the archive.
- */
- if (och->och_flags & FMODE_WRITE &&
- test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags))
- op_data->op_bias |= MDS_DATA_MODIFIED;
-}
-
-/**
- * Perform a close, possibly with a bias.
- * The meaning of "data" depends on the value of "bias".
- *
- * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
- * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
- * swap layouts with.
- */
-static int ll_close_inode_openhandle(struct inode *inode,
- struct obd_client_handle *och,
- enum mds_op_bias bias,
- void *data)
-{
- const struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_export *md_exp = ll_i2mdexp(inode);
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- int rc;
-
- if (!class_exp2obd(md_exp)) {
- CERROR("%s: invalid MDC connection handle closing " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid));
- rc = 0;
- goto out;
- }
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- /*
- * We leak openhandle and request here on error, but not much to be
- * done in OOM case since app won't retry close on error either.
- */
- if (!op_data) {
- rc = -ENOMEM;
- goto out;
- }
-
- ll_prepare_close(inode, op_data, och);
- switch (bias) {
- case MDS_CLOSE_LAYOUT_SWAP:
- LASSERT(data);
- op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
- op_data->op_data_version = 0;
- op_data->op_lease_handle = och->och_lease_handle;
- op_data->op_fid2 = *ll_inode2fid(data);
- break;
-
- case MDS_HSM_RELEASE:
- LASSERT(data);
- op_data->op_bias |= MDS_HSM_RELEASE;
- op_data->op_data_version = *(__u64 *)data;
- op_data->op_lease_handle = och->och_lease_handle;
- op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
- break;
-
- default:
- LASSERT(!data);
- break;
- }
-
- rc = md_close(md_exp, op_data, och->och_mod, &req);
- if (rc && rc != -EINTR) {
- CERROR("%s: inode " DFID " mdc close failed: rc = %d\n",
- md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
- }
-
- if (op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP) &&
- !rc) {
- struct mdt_body *body;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
- rc = -EBUSY;
- }
-
- ll_finish_md_op_data(op_data);
-
-out:
- md_clear_open_replay_data(md_exp, och);
- och->och_fh.cookie = DEAD_HANDLE_MAGIC;
- kfree(och);
-
- ptlrpc_req_finished(req);
- return rc;
-}
-
-int ll_md_real_close(struct inode *inode, fmode_t fmode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_client_handle **och_p;
- struct obd_client_handle *och;
- __u64 *och_usecount;
- int rc = 0;
-
- if (fmode & FMODE_WRITE) {
- och_p = &lli->lli_mds_write_och;
- och_usecount = &lli->lli_open_fd_write_count;
- } else if (fmode & FMODE_EXEC) {
- och_p = &lli->lli_mds_exec_och;
- och_usecount = &lli->lli_open_fd_exec_count;
- } else {
- LASSERT(fmode & FMODE_READ);
- och_p = &lli->lli_mds_read_och;
- och_usecount = &lli->lli_open_fd_read_count;
- }
-
- mutex_lock(&lli->lli_och_mutex);
- if (*och_usecount > 0) {
- /* There are still users of this handle, so skip
- * freeing it.
- */
- mutex_unlock(&lli->lli_och_mutex);
- return 0;
- }
-
- och = *och_p;
- *och_p = NULL;
- mutex_unlock(&lli->lli_och_mutex);
-
- if (och) {
- /* There might be a race and this handle may already
- * be closed.
- */
- rc = ll_close_inode_openhandle(inode, och, 0, NULL);
- }
-
- return rc;
-}
-
-static int ll_md_close(struct inode *inode, struct file *file)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_inode_info *lli = ll_i2info(inode);
- int lockmode;
- __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
- struct lustre_handle lockh;
- union ldlm_policy_data policy = {
- .l_inodebits = { MDS_INODELOCK_OPEN }
- };
- int rc = 0;
-
- /* clear group lock, if present */
- if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
- ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
-
- if (fd->fd_lease_och) {
- bool lease_broken;
-
- /* Usually the lease is not released when the
- * application crashed, we need to release here.
- */
- rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
- CDEBUG(rc ? D_ERROR : D_INODE,
- "Clean up lease " DFID " %d/%d\n",
- PFID(&lli->lli_fid), rc, lease_broken);
-
- fd->fd_lease_och = NULL;
- }
-
- if (fd->fd_och) {
- rc = ll_close_inode_openhandle(inode, fd->fd_och, 0, NULL);
- fd->fd_och = NULL;
- goto out;
- }
-
- /* Let's see if we have good enough OPEN lock on the file and if
- * we can skip talking to MDS
- */
-
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_omode & FMODE_WRITE) {
- lockmode = LCK_CW;
- LASSERT(lli->lli_open_fd_write_count);
- lli->lli_open_fd_write_count--;
- } else if (fd->fd_omode & FMODE_EXEC) {
- lockmode = LCK_PR;
- LASSERT(lli->lli_open_fd_exec_count);
- lli->lli_open_fd_exec_count--;
- } else {
- lockmode = LCK_CR;
- LASSERT(lli->lli_open_fd_read_count);
- lli->lli_open_fd_read_count--;
- }
- mutex_unlock(&lli->lli_och_mutex);
-
- if (!md_lock_match(ll_i2mdexp(inode), flags, ll_inode2fid(inode),
- LDLM_IBITS, &policy, lockmode, &lockh))
- rc = ll_md_real_close(inode, fd->fd_omode);
-
-out:
- LUSTRE_FPRIVATE(file) = NULL;
- ll_file_data_put(fd);
-
- return rc;
-}
-
-/* While this returns an error code, fput() the caller does not, so we need
- * to make every effort to clean up all of our state here. Also, applications
- * rarely check close errors and even if an error is returned they will not
- * re-try the close call.
- */
-int ll_file_release(struct inode *inode, struct file *file)
-{
- struct ll_file_data *fd;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- if (!is_root_inode(inode))
- ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
- fd = LUSTRE_FPRIVATE(file);
- LASSERT(fd);
-
- /* The last ref on @file, maybe not be the owner pid of statahead,
- * because parent and child process can share the same file handle.
- */
- if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
- ll_deauthorize_statahead(inode, fd);
-
- if (is_root_inode(inode)) {
- LUSTRE_FPRIVATE(file) = NULL;
- ll_file_data_put(fd);
- return 0;
- }
-
- if (!S_ISDIR(inode->i_mode)) {
- if (lli->lli_clob)
- lov_read_and_clear_async_rc(lli->lli_clob);
- lli->lli_async_rc = 0;
- }
-
- rc = ll_md_close(inode, file);
-
- if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
- libcfs_debug_dumplog();
-
- return rc;
-}
-
-static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
- struct lookup_intent *itp)
-{
- struct inode *inode = d_inode(de);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct dentry *parent = de->d_parent;
- const char *name = NULL;
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- int len = 0, rc;
-
- LASSERT(parent);
- LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
-
- /*
- * if server supports open-by-fid, or file name is invalid, don't pack
- * name in open request
- */
- if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
- lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
- name = de->d_name.name;
- len = de->d_name.len;
- }
-
- op_data = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len,
- O_RDWR, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
- op_data->op_data = lmm;
- op_data->op_data_size = lmmsize;
-
- rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
- &ll_md_blocking_ast, 0);
- ll_finish_md_op_data(op_data);
- if (rc == -ESTALE) {
- /* reason for keep own exit path - don`t flood log
- * with messages with -ESTALE errors.
- */
- if (!it_disposition(itp, DISP_OPEN_OPEN) ||
- it_open_error(DISP_OPEN_OPEN, itp))
- goto out;
- ll_release_openhandle(inode, itp);
- goto out;
- }
-
- if (it_disposition(itp, DISP_LOOKUP_NEG)) {
- rc = -ENOENT;
- goto out;
- }
-
- if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
- rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
- CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
- goto out;
- }
-
- rc = ll_prep_inode(&inode, req, NULL, itp);
- if (!rc && itp->it_lock_mode)
- ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL);
-
-out:
- ptlrpc_req_finished(req);
- ll_intent_drop_lock(itp);
-
- /*
- * We did open by fid, but by the time we got to the server,
- * the object disappeared. If this is a create, we cannot really
- * tell the userspace that the file it was trying to create
- * does not exist. Instead let's return -ESTALE, and the VFS will
- * retry the create with LOOKUP_REVAL that we are going to catch
- * in ll_revalidate_dentry() and use lookup then.
- */
- if (rc == -ENOENT && itp->it_op & IT_CREAT)
- rc = -ESTALE;
-
- return rc;
-}
-
-static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
- struct obd_client_handle *och)
-{
- struct mdt_body *body;
-
- body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
- och->och_fh = body->mbo_handle;
- och->och_fid = body->mbo_fid1;
- och->och_lease_handle.cookie = it->it_lock_handle;
- och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
- och->och_flags = it->it_flags;
-
- return md_set_open_replay_data(md_exp, och, it);
-}
-
-static int ll_local_open(struct file *file, struct lookup_intent *it,
- struct ll_file_data *fd, struct obd_client_handle *och)
-{
- struct inode *inode = file_inode(file);
-
- LASSERT(!LUSTRE_FPRIVATE(file));
-
- LASSERT(fd);
-
- if (och) {
- int rc;
-
- rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
- if (rc != 0)
- return rc;
- }
-
- LUSTRE_FPRIVATE(file) = fd;
- ll_readahead_init(inode, &fd->fd_ras);
- fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
-
- /* ll_cl_context initialize */
- rwlock_init(&fd->fd_lock);
- INIT_LIST_HEAD(&fd->fd_lccs);
-
- return 0;
-}
-
-/* Open a file, and (for the very first open) create objects on the OSTs at
- * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
- * creation or open until ll_lov_setstripe() ioctl is called.
- *
- * If we already have the stripe MD locally then we don't request it in
- * md_open(), by passing a lmm_size = 0.
- *
- * It is up to the application to ensure no other processes open this file
- * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
- * used. We might be able to avoid races of that sort by getting lli_open_sem
- * before returning in the O_LOV_DELAY_CREATE case and dropping it here
- * or in ll_file_release(), but I'm not sure that is desirable/necessary.
- */
-int ll_file_open(struct inode *inode, struct file *file)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lookup_intent *it, oit = { .it_op = IT_OPEN,
- .it_flags = file->f_flags };
- struct obd_client_handle **och_p = NULL;
- __u64 *och_usecount = NULL;
- struct ll_file_data *fd;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), flags %o\n",
- PFID(ll_inode2fid(inode)), inode, file->f_flags);
-
- it = file->private_data; /* XXX: compat macro */
- file->private_data = NULL; /* prevent ll_local_open assertion */
-
- fd = ll_file_data_get();
- if (!fd) {
- rc = -ENOMEM;
- goto out_openerr;
- }
-
- fd->fd_file = file;
- if (S_ISDIR(inode->i_mode))
- ll_authorize_statahead(inode, fd);
-
- if (is_root_inode(inode)) {
- LUSTRE_FPRIVATE(file) = fd;
- return 0;
- }
-
- if (!it || !it->it_disposition) {
- /* Convert f_flags into access mode. We cannot use file->f_mode,
- * because everything but O_ACCMODE mask was stripped from
- * there
- */
- if ((oit.it_flags + 1) & O_ACCMODE)
- oit.it_flags++;
- if (file->f_flags & O_TRUNC)
- oit.it_flags |= FMODE_WRITE;
-
- /* kernel only call f_op->open in dentry_open. filp_open calls
- * dentry_open after call to open_namei that checks permissions.
- * Only nfsd_open call dentry_open directly without checking
- * permissions and because of that this code below is safe.
- */
- if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
- oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
-
- /* We do not want O_EXCL here, presumably we opened the file
- * already? XXX - NFS implications?
- */
- oit.it_flags &= ~O_EXCL;
-
- /* bug20584, if "it_flags" contains O_CREAT, the file will be
- * created if necessary, then "IT_CREAT" should be set to keep
- * consistent with it
- */
- if (oit.it_flags & O_CREAT)
- oit.it_op |= IT_CREAT;
-
- it = &oit;
- }
-
-restart:
- /* Let's see if we have file open on MDS already. */
- if (it->it_flags & FMODE_WRITE) {
- och_p = &lli->lli_mds_write_och;
- och_usecount = &lli->lli_open_fd_write_count;
- } else if (it->it_flags & FMODE_EXEC) {
- och_p = &lli->lli_mds_exec_och;
- och_usecount = &lli->lli_open_fd_exec_count;
- } else {
- och_p = &lli->lli_mds_read_och;
- och_usecount = &lli->lli_open_fd_read_count;
- }
-
- mutex_lock(&lli->lli_och_mutex);
- if (*och_p) { /* Open handle is present */
- if (it_disposition(it, DISP_OPEN_OPEN)) {
- /* Well, there's extra open request that we do not need,
- * let's close it somehow. This will decref request.
- */
- rc = it_open_error(DISP_OPEN_OPEN, it);
- if (rc) {
- mutex_unlock(&lli->lli_och_mutex);
- goto out_openerr;
- }
-
- ll_release_openhandle(inode, it);
- }
- (*och_usecount)++;
-
- rc = ll_local_open(file, it, fd, NULL);
- if (rc) {
- (*och_usecount)--;
- mutex_unlock(&lli->lli_och_mutex);
- goto out_openerr;
- }
- } else {
- LASSERT(*och_usecount == 0);
- if (!it->it_disposition) {
- /* We cannot just request lock handle now, new ELC code
- * means that one of other OPEN locks for this file
- * could be cancelled, and since blocking ast handler
- * would attempt to grab och_mutex as well, that would
- * result in a deadlock
- */
- mutex_unlock(&lli->lli_och_mutex);
- /*
- * Normally called under two situations:
- * 1. NFS export.
- * 2. revalidate with IT_OPEN (revalidate doesn't
- * execute this intent any more).
- *
- * Always fetch MDS_OPEN_LOCK if this is not setstripe.
- *
- * Always specify MDS_OPEN_BY_FID because we don't want
- * to get file with different fid.
- */
- it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
- rc = ll_intent_file_open(file->f_path.dentry,
- NULL, 0, it);
- if (rc)
- goto out_openerr;
-
- goto restart;
- }
- *och_p = kzalloc(sizeof(struct obd_client_handle), GFP_NOFS);
- if (!*och_p) {
- rc = -ENOMEM;
- goto out_och_free;
- }
-
- (*och_usecount)++;
-
- /* md_intent_lock() didn't get a request ref if there was an
- * open error, so don't do cleanup on the request here
- * (bug 3430)
- */
- /* XXX (green): Should not we bail out on any error here, not
- * just open error?
- */
- rc = it_open_error(DISP_OPEN_OPEN, it);
- if (rc)
- goto out_och_free;
-
- LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
- "inode %p: disposition %x, status %d\n", inode,
- it_disposition(it, ~0), it->it_status);
-
- rc = ll_local_open(file, it, fd, *och_p);
- if (rc)
- goto out_och_free;
- }
- mutex_unlock(&lli->lli_och_mutex);
- fd = NULL;
-
- /* Must do this outside lli_och_mutex lock to prevent deadlock where
- * different kind of OPEN lock for this same inode gets cancelled
- * by ldlm_cancel_lru
- */
- if (!S_ISREG(inode->i_mode))
- goto out_och_free;
-
- cl_lov_delay_create_clear(&file->f_flags);
- goto out_och_free;
-
-out_och_free:
- if (rc) {
- if (och_p && *och_p) {
- kfree(*och_p);
- *och_p = NULL;
- (*och_usecount)--;
- }
- mutex_unlock(&lli->lli_och_mutex);
-
-out_openerr:
- if (lli->lli_opendir_key == fd)
- ll_deauthorize_statahead(inode, fd);
- if (fd)
- ll_file_data_put(fd);
- } else {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
- }
-
- if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
- ptlrpc_req_finished(it->it_request);
- it_clear_disposition(it, DISP_ENQ_OPEN_REF);
- }
-
- return rc;
-}
-
-static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
- struct ldlm_lock_desc *desc,
- void *data, int flag)
-{
- int rc;
- struct lustre_handle lockh;
-
- switch (flag) {
- case LDLM_CB_BLOCKING:
- ldlm_lock2handle(lock, &lockh);
- rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
- if (rc < 0) {
- CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
- return rc;
- }
- break;
- case LDLM_CB_CANCELING:
- /* do nothing */
- break;
- }
- return 0;
-}
-
-/**
- * Acquire a lease and open the file.
- */
-static struct obd_client_handle *
-ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
- __u64 open_flags)
-{
- struct lookup_intent it = { .it_op = IT_OPEN };
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct md_op_data *op_data;
- struct ptlrpc_request *req = NULL;
- struct lustre_handle old_handle = { 0 };
- struct obd_client_handle *och = NULL;
- int rc;
- int rc2;
-
- if (fmode != FMODE_WRITE && fmode != FMODE_READ)
- return ERR_PTR(-EINVAL);
-
- if (file) {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct obd_client_handle **och_p;
- __u64 *och_usecount;
-
- if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
- return ERR_PTR(-EPERM);
-
- /* Get the openhandle of the file */
- rc = -EBUSY;
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- mutex_unlock(&lli->lli_och_mutex);
- return ERR_PTR(rc);
- }
-
- if (!fd->fd_och) {
- if (file->f_mode & FMODE_WRITE) {
- LASSERT(lli->lli_mds_write_och);
- och_p = &lli->lli_mds_write_och;
- och_usecount = &lli->lli_open_fd_write_count;
- } else {
- LASSERT(lli->lli_mds_read_och);
- och_p = &lli->lli_mds_read_och;
- och_usecount = &lli->lli_open_fd_read_count;
- }
- if (*och_usecount == 1) {
- fd->fd_och = *och_p;
- *och_p = NULL;
- *och_usecount = 0;
- rc = 0;
- }
- }
- mutex_unlock(&lli->lli_och_mutex);
- if (rc < 0) /* more than 1 opener */
- return ERR_PTR(rc);
-
- LASSERT(fd->fd_och);
- old_handle = fd->fd_och->och_fh;
- }
-
- och = kzalloc(sizeof(*och), GFP_NOFS);
- if (!och)
- return ERR_PTR(-ENOMEM);
-
- op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- /* To tell the MDT this openhandle is from the same owner */
- op_data->op_handle = old_handle;
-
- it.it_flags = fmode | open_flags;
- it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
- rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
- &ll_md_blocking_lease_ast,
- /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
- * it can be cancelled which may mislead applications that the lease is
- * broken;
- * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
- * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
- * doesn't deal with openhandle, so normal openhandle will be leaked.
- */
- LDLM_FL_NO_LRU | LDLM_FL_EXCL);
- ll_finish_md_op_data(op_data);
- ptlrpc_req_finished(req);
- if (rc < 0)
- goto out_release_it;
-
- if (it_disposition(&it, DISP_LOOKUP_NEG)) {
- rc = -ENOENT;
- goto out_release_it;
- }
-
- rc = it_open_error(DISP_OPEN_OPEN, &it);
- if (rc)
- goto out_release_it;
-
- LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
- ll_och_fill(sbi->ll_md_exp, &it, och);
-
- if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ {
- rc = -EOPNOTSUPP;
- goto out_close;
- }
-
- /* already get lease, handle lease lock */
- ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
- if (it.it_lock_mode == 0 ||
- it.it_lock_bits != MDS_INODELOCK_OPEN) {
- /* open lock must return for lease */
- CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
- PFID(ll_inode2fid(inode)), it.it_lock_mode,
- it.it_lock_bits);
- rc = -EPROTO;
- goto out_close;
- }
-
- ll_intent_release(&it);
- return och;
-
-out_close:
- /* Cancel open lock */
- if (it.it_lock_mode != 0) {
- ldlm_lock_decref_and_cancel(&och->och_lease_handle,
- it.it_lock_mode);
- it.it_lock_mode = 0;
- och->och_lease_handle.cookie = 0ULL;
- }
- rc2 = ll_close_inode_openhandle(inode, och, 0, NULL);
- if (rc2 < 0)
- CERROR("%s: error closing file " DFID ": %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&ll_i2info(inode)->lli_fid), rc2);
- och = NULL; /* och has been freed in ll_close_inode_openhandle() */
-out_release_it:
- ll_intent_release(&it);
-out:
- kfree(och);
- return ERR_PTR(rc);
-}
-
-/**
- * Check whether a layout swap can be done between two inodes.
- *
- * \param[in] inode1 First inode to check
- * \param[in] inode2 Second inode to check
- *
- * \retval 0 on success, layout swap can be performed between both inodes
- * \retval negative error code if requirements are not met
- */
-static int ll_check_swap_layouts_validity(struct inode *inode1,
- struct inode *inode2)
-{
- if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
- return -EINVAL;
-
- if (inode_permission(inode1, MAY_WRITE) ||
- inode_permission(inode2, MAY_WRITE))
- return -EPERM;
-
- if (inode1->i_sb != inode2->i_sb)
- return -EXDEV;
-
- return 0;
-}
-
-static int ll_swap_layouts_close(struct obd_client_handle *och,
- struct inode *inode, struct inode *inode2)
-{
- const struct lu_fid *fid1 = ll_inode2fid(inode);
- const struct lu_fid *fid2;
- int rc;
-
- CDEBUG(D_INODE, "%s: biased close of file " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
-
- rc = ll_check_swap_layouts_validity(inode, inode2);
- if (rc < 0)
- goto out_free_och;
-
- /* We now know that inode2 is a lustre inode */
- fid2 = ll_inode2fid(inode2);
-
- rc = lu_fid_cmp(fid1, fid2);
- if (!rc) {
- rc = -EINVAL;
- goto out_free_och;
- }
-
- /*
- * Close the file and swap layouts between inode & inode2.
- * NB: lease lock handle is released in mdc_close_layout_swap_pack()
- * because we still need it to pack l_remote_handle to MDT.
- */
- rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP,
- inode2);
-
- och = NULL; /* freed in ll_close_inode_openhandle() */
-
-out_free_och:
- kfree(och);
- return rc;
-}
-
-/**
- * Release lease and close the file.
- * It will check if the lease has ever broken.
- */
-static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
- bool *lease_broken)
-{
- struct ldlm_lock *lock;
- bool cancelled = true;
-
- lock = ldlm_handle2lock(&och->och_lease_handle);
- if (lock) {
- lock_res_and_lock(lock);
- cancelled = ldlm_is_cancel(lock);
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
- }
-
- CDEBUG(D_INODE, "lease for " DFID " broken? %d\n",
- PFID(&ll_i2info(inode)->lli_fid), cancelled);
-
- if (!cancelled)
- ldlm_cli_cancel(&och->och_lease_handle, 0);
- if (lease_broken)
- *lease_broken = cancelled;
-
- return ll_close_inode_openhandle(inode, och, 0, NULL);
-}
-
-int ll_merge_attr(const struct lu_env *env, struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *obj = lli->lli_clob;
- struct cl_attr *attr = vvp_env_thread_attr(env);
- s64 atime;
- s64 mtime;
- s64 ctime;
- int rc = 0;
-
- ll_inode_size_lock(inode);
-
- /* merge timestamps the most recently obtained from mds with
- * timestamps obtained from osts
- */
- LTIME_S(inode->i_atime) = lli->lli_atime;
- LTIME_S(inode->i_mtime) = lli->lli_mtime;
- LTIME_S(inode->i_ctime) = lli->lli_ctime;
-
- mtime = LTIME_S(inode->i_mtime);
- atime = LTIME_S(inode->i_atime);
- ctime = LTIME_S(inode->i_ctime);
-
- cl_object_attr_lock(obj);
- rc = cl_object_attr_get(env, obj, attr);
- cl_object_attr_unlock(obj);
-
- if (rc != 0)
- goto out_size_unlock;
-
- if (atime < attr->cat_atime)
- atime = attr->cat_atime;
-
- if (ctime < attr->cat_ctime)
- ctime = attr->cat_ctime;
-
- if (mtime < attr->cat_mtime)
- mtime = attr->cat_mtime;
-
- CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
- PFID(&lli->lli_fid), attr->cat_size);
-
- i_size_write(inode, attr->cat_size);
-
- inode->i_blocks = attr->cat_blocks;
-
- LTIME_S(inode->i_mtime) = mtime;
- LTIME_S(inode->i_atime) = atime;
- LTIME_S(inode->i_ctime) = ctime;
-
-out_size_unlock:
- ll_inode_size_unlock(inode);
-
- return rc;
-}
-
-static bool file_is_noatime(const struct file *file)
-{
- const struct vfsmount *mnt = file->f_path.mnt;
- const struct inode *inode = file_inode(file);
-
- /* Adapted from file_accessed() and touch_atime().*/
- if (file->f_flags & O_NOATIME)
- return true;
-
- if (inode->i_flags & S_NOATIME)
- return true;
-
- if (IS_NOATIME(inode))
- return true;
-
- if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
- return true;
-
- if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
- return true;
-
- if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode))
- return true;
-
- return false;
-}
-
-static void ll_io_init(struct cl_io *io, const struct file *file, int write)
-{
- struct inode *inode = file_inode(file);
-
- io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
- if (write) {
- io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
- io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
- file->f_flags & O_DIRECT ||
- IS_SYNC(inode);
- }
- io->ci_obj = ll_i2info(inode)->lli_clob;
- io->ci_lockreq = CILR_MAYBE;
- if (ll_file_nolock(file)) {
- io->ci_lockreq = CILR_NEVER;
- io->ci_no_srvlock = 1;
- } else if (file->f_flags & O_APPEND) {
- io->ci_lockreq = CILR_MANDATORY;
- }
-
- io->ci_noatime = file_is_noatime(file);
-}
-
-static ssize_t
-ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
- struct file *file, enum cl_io_type iot,
- loff_t *ppos, size_t count)
-{
- struct ll_inode_info *lli = ll_i2info(file_inode(file));
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct vvp_io *vio = vvp_env_io(env);
- struct range_lock range;
- struct cl_io *io;
- ssize_t result = 0;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zu\n",
- file, iot, *ppos, count);
-
-restart:
- io = vvp_env_thread_io(env);
- ll_io_init(io, file, iot == CIT_WRITE);
-
- if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
- struct vvp_io *vio = vvp_env_io(env);
- bool range_locked = false;
-
- if (file->f_flags & O_APPEND)
- range_lock_init(&range, 0, LUSTRE_EOF);
- else
- range_lock_init(&range, *ppos, *ppos + count - 1);
-
- vio->vui_fd = LUSTRE_FPRIVATE(file);
- vio->vui_iter = args->u.normal.via_iter;
- vio->vui_iocb = args->u.normal.via_iocb;
- /*
- * Direct IO reads must also take range lock,
- * or multiple reads will try to work on the same pages
- * See LU-6227 for details.
- */
- if (((iot == CIT_WRITE) ||
- (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
- !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
- range.rl_node.in_extent.start,
- range.rl_node.in_extent.end);
- rc = range_lock(&lli->lli_write_tree, &range);
- if (rc < 0)
- goto out;
-
- range_locked = true;
- }
- ll_cl_add(file, env, io);
- rc = cl_io_loop(env, io);
- ll_cl_remove(file, env);
- if (range_locked) {
- CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
- range.rl_node.in_extent.start,
- range.rl_node.in_extent.end);
- range_unlock(&lli->lli_write_tree, &range);
- }
- } else {
- /* cl_io_rw_init() handled IO */
- rc = io->ci_result;
- }
-
- if (io->ci_nob > 0) {
- result = io->ci_nob;
- count -= io->ci_nob;
- *ppos = io->u.ci_wr.wr.crw_pos;
-
- /* prepare IO restart */
- if (count > 0)
- args->u.normal.via_iter = vio->vui_iter;
- }
-out:
- cl_io_fini(env, io);
-
- if ((!rc || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
- CDEBUG(D_VFSTRACE,
- "%s: restart %s from %lld, count:%zu, result: %zd\n",
- file_dentry(file)->d_name.name,
- iot == CIT_READ ? "read" : "write",
- *ppos, count, result);
- goto restart;
- }
-
- if (iot == CIT_READ) {
- if (result >= 0)
- ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
- LPROC_LL_READ_BYTES, result);
- } else if (iot == CIT_WRITE) {
- if (result >= 0) {
- ll_stats_ops_tally(ll_i2sbi(file_inode(file)),
- LPROC_LL_WRITE_BYTES, result);
- fd->fd_write_failed = false;
- } else if (!result && !rc) {
- rc = io->ci_result;
- if (rc < 0)
- fd->fd_write_failed = true;
- else
- fd->fd_write_failed = false;
- } else if (rc != -ERESTARTSYS) {
- fd->fd_write_failed = true;
- }
- }
- CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
-
- return result > 0 ? result : rc;
-}
-
-static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
- struct lu_env *env;
- struct vvp_io_args *args;
- ssize_t result;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- args = ll_env_args(env);
- args->u.normal.via_iter = to;
- args->u.normal.via_iocb = iocb;
-
- result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
- &iocb->ki_pos, iov_iter_count(to));
- cl_env_put(env, &refcheck);
- return result;
-}
-
-/*
- * Write to a file (through the page cache).
- */
-static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- struct lu_env *env;
- struct vvp_io_args *args;
- ssize_t result;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- args = ll_env_args(env);
- args->u.normal.via_iter = from;
- args->u.normal.via_iocb = iocb;
-
- result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
- &iocb->ki_pos, iov_iter_count(from));
- cl_env_put(env, &refcheck);
- return result;
-}
-
-int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
- __u64 flags, struct lov_user_md *lum,
- int lum_size)
-{
- struct lookup_intent oit = {
- .it_op = IT_OPEN,
- .it_flags = flags | MDS_OPEN_BY_FID,
- };
- int rc = 0;
-
- ll_inode_size_lock(inode);
- rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
- if (rc < 0)
- goto out_unlock;
-
- ll_release_openhandle(inode, &oit);
-
-out_unlock:
- ll_inode_size_unlock(inode);
- ll_intent_release(&oit);
- return rc;
-}
-
-int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
- struct lov_mds_md **lmmp, int *lmm_size,
- struct ptlrpc_request **request)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct mdt_body *body;
- struct lov_mds_md *lmm = NULL;
- struct ptlrpc_request *req = NULL;
- struct md_op_data *op_data;
- int rc, lmmsize;
-
- rc = ll_get_default_mdsize(sbi, &lmmsize);
- if (rc)
- return rc;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
- strlen(filename), lmmsize,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
- rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n",
- filename, rc);
- goto out;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- lmmsize = body->mbo_eadatasize;
-
- if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
- lmmsize == 0) {
- rc = -ENODATA;
- goto out;
- }
-
- lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
-
- if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
- (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
- rc = -EPROTO;
- goto out;
- }
-
- /*
- * This is coming from the MDS, so is probably in
- * little endian. We convert it to host endian before
- * passing it to userspace.
- */
- if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) {
- int stripe_count;
-
- stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
- if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
- stripe_count = 0;
-
- /* if function called for directory - we should
- * avoid swab not existent lsm objects
- */
- if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
- lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
- if (S_ISREG(body->mbo_mode))
- lustre_swab_lov_user_md_objects(
- ((struct lov_user_md_v1 *)lmm)->lmm_objects,
- stripe_count);
- } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
- lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
- if (S_ISREG(body->mbo_mode))
- lustre_swab_lov_user_md_objects(
- ((struct lov_user_md_v3 *)lmm)->lmm_objects,
- stripe_count);
- }
- }
-
-out:
- *lmmp = lmm;
- *lmm_size = lmmsize;
- *request = req;
- return rc;
-}
-
-static int ll_lov_setea(struct inode *inode, struct file *file,
- unsigned long arg)
-{
- __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
- struct lov_user_md *lump;
- int lum_size = sizeof(struct lov_user_md) +
- sizeof(struct lov_user_ost_data);
- int rc;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- lump = kzalloc(lum_size, GFP_NOFS);
- if (!lump)
- return -ENOMEM;
-
- if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
- kvfree(lump);
- return -EFAULT;
- }
-
- rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lump,
- lum_size);
- cl_lov_delay_create_clear(&file->f_flags);
-
- kvfree(lump);
- return rc;
-}
-
-static int ll_file_getstripe(struct inode *inode,
- struct lov_user_md __user *lum)
-{
- struct lu_env *env;
- u16 refcheck;
- int rc;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-static int ll_lov_setstripe(struct inode *inode, struct file *file,
- unsigned long arg)
-{
- struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
- struct lov_user_md *klum;
- int lum_size, rc;
- __u64 flags = FMODE_WRITE;
-
- rc = ll_copy_user_md(lum, &klum);
- if (rc < 0)
- return rc;
-
- lum_size = rc;
- rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, klum,
- lum_size);
- cl_lov_delay_create_clear(&file->f_flags);
- if (rc == 0) {
- __u32 gen;
-
- put_user(0, &lum->lmm_stripe_count);
-
- ll_layout_refresh(inode, &gen);
- rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
- }
-
- kfree(klum);
- return rc;
-}
-
-static int
-ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_grouplock grouplock;
- int rc;
-
- if (arg == 0) {
- CWARN("group id for group lock must not be 0\n");
- return -EINVAL;
- }
-
- if (ll_file_nolock(file))
- return -EOPNOTSUPP;
-
- spin_lock(&lli->lli_lock);
- if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
- CWARN("group lock already existed with gid %lu\n",
- fd->fd_grouplock.lg_gid);
- spin_unlock(&lli->lli_lock);
- return -EINVAL;
- }
- LASSERT(!fd->fd_grouplock.lg_lock);
- spin_unlock(&lli->lli_lock);
-
- rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
- arg, (file->f_flags & O_NONBLOCK), &grouplock);
- if (rc)
- return rc;
-
- spin_lock(&lli->lli_lock);
- if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
- spin_unlock(&lli->lli_lock);
- CERROR("another thread just won the race\n");
- cl_put_grouplock(&grouplock);
- return -EINVAL;
- }
-
- fd->fd_flags |= LL_FILE_GROUP_LOCKED;
- fd->fd_grouplock = grouplock;
- spin_unlock(&lli->lli_lock);
-
- CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
- return 0;
-}
-
-static int ll_put_grouplock(struct inode *inode, struct file *file,
- unsigned long arg)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_grouplock grouplock;
-
- spin_lock(&lli->lli_lock);
- if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- spin_unlock(&lli->lli_lock);
- CWARN("no group lock held\n");
- return -EINVAL;
- }
- LASSERT(fd->fd_grouplock.lg_lock);
-
- if (fd->fd_grouplock.lg_gid != arg) {
- CWARN("group lock %lu doesn't match current id %lu\n",
- arg, fd->fd_grouplock.lg_gid);
- spin_unlock(&lli->lli_lock);
- return -EINVAL;
- }
-
- grouplock = fd->fd_grouplock;
- memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
- fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
- spin_unlock(&lli->lli_lock);
-
- cl_put_grouplock(&grouplock);
- CDEBUG(D_INFO, "group lock %lu released\n", arg);
- return 0;
-}
-
-/**
- * Close inode open handle
- *
- * \param inode [in] inode in question
- * \param it [in,out] intent which contains open info and result
- *
- * \retval 0 success
- * \retval <0 failure
- */
-int ll_release_openhandle(struct inode *inode, struct lookup_intent *it)
-{
- struct obd_client_handle *och;
- int rc;
-
- LASSERT(inode);
-
- /* Root ? Do nothing. */
- if (is_root_inode(inode))
- return 0;
-
- /* No open handle to close? Move away */
- if (!it_disposition(it, DISP_OPEN_OPEN))
- return 0;
-
- LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
-
- och = kzalloc(sizeof(*och), GFP_NOFS);
- if (!och) {
- rc = -ENOMEM;
- goto out;
- }
-
- ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
-
- rc = ll_close_inode_openhandle(inode, och, 0, NULL);
-out:
- /* this one is in place of ll_file_open */
- if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
- ptlrpc_req_finished(it->it_request);
- it_clear_disposition(it, DISP_ENQ_OPEN_REF);
- }
- return rc;
-}
-
-/**
- * Get size for inode for which FIEMAP mapping is requested.
- * Make the FIEMAP get_info call and returns the result.
- *
- * \param fiemap kernel buffer to hold extens
- * \param num_bytes kernel buffer size
- */
-static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
- size_t num_bytes)
-{
- struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
- struct lu_env *env;
- u16 refcheck;
- int rc = 0;
-
- /* Checks for fiemap flags */
- if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
- fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
- return -EBADR;
- }
-
- /* Check for FIEMAP_FLAG_SYNC */
- if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
- rc = filemap_fdatawrite(inode->i_mapping);
- if (rc)
- return rc;
- }
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- if (i_size_read(inode) == 0) {
- rc = ll_glimpse_size(inode);
- if (rc)
- goto out;
- }
-
- fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
- obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
- obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
-
- /* If filesize is 0, then there would be no objects for mapping */
- if (fmkey.lfik_oa.o_size == 0) {
- fiemap->fm_mapped_extents = 0;
- rc = 0;
- goto out;
- }
-
- memcpy(&fmkey.lfik_fiemap, fiemap, sizeof(*fiemap));
-
- rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
- &fmkey, fiemap, &num_bytes);
-out:
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-int ll_fid2path(struct inode *inode, void __user *arg)
-{
- struct obd_export *exp = ll_i2mdexp(inode);
- const struct getinfo_fid2path __user *gfin = arg;
- struct getinfo_fid2path *gfout;
- u32 pathlen;
- size_t outsize;
- int rc;
-
- if (!capable(CAP_DAC_READ_SEARCH) &&
- !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
- return -EPERM;
-
- /* Only need to get the buflen */
- if (get_user(pathlen, &gfin->gf_pathlen))
- return -EFAULT;
-
- if (pathlen > PATH_MAX)
- return -EINVAL;
-
- outsize = sizeof(*gfout) + pathlen;
-
- gfout = kzalloc(outsize, GFP_NOFS);
- if (!gfout)
- return -ENOMEM;
-
- if (copy_from_user(gfout, arg, sizeof(*gfout))) {
- rc = -EFAULT;
- goto gf_free;
- }
-
- /* Call mdc_iocontrol */
- rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
- if (rc != 0)
- goto gf_free;
-
- if (copy_to_user(arg, gfout, outsize))
- rc = -EFAULT;
-
-gf_free:
- kfree(gfout);
- return rc;
-}
-
-/*
- * Read the data_version for inode.
- *
- * This value is computed using stripe object version on OST.
- * Version is computed using server side locking.
- *
- * @param flags if do sync on the OST side;
- * 0: no sync
- * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
- * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
- */
-int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
-{
- struct cl_object *obj = ll_i2info(inode)->lli_clob;
- struct lu_env *env;
- struct cl_io *io;
- u16 refcheck;
- int result;
-
- /* If no file object initialized, we consider its version is 0. */
- if (!obj) {
- *data_version = 0;
- return 0;
- }
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = obj;
- io->u.ci_data_version.dv_data_version = 0;
- io->u.ci_data_version.dv_flags = flags;
-
-restart:
- if (!cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj))
- result = cl_io_loop(env, io);
- else
- result = io->ci_result;
-
- *data_version = io->u.ci_data_version.dv_data_version;
-
- cl_io_fini(env, io);
-
- if (unlikely(io->ci_need_restart))
- goto restart;
-
- cl_env_put(env, &refcheck);
-
- return result;
-}
-
-/*
- * Trigger a HSM release request for the provided inode.
- */
-int ll_hsm_release(struct inode *inode)
-{
- struct lu_env *env;
- struct obd_client_handle *och = NULL;
- __u64 data_version = 0;
- int rc;
- u16 refcheck;
-
- CDEBUG(D_INODE, "%s: Releasing file " DFID ".\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&ll_i2info(inode)->lli_fid));
-
- och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
- if (IS_ERR(och)) {
- rc = PTR_ERR(och);
- goto out;
- }
-
- /* Grab latest data_version and [am]time values */
- rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
- if (rc != 0)
- goto out;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- rc = PTR_ERR(env);
- goto out;
- }
-
- ll_merge_attr(env, inode);
- cl_env_put(env, &refcheck);
-
- /* Release the file.
- * NB: lease lock handle is released in mdc_hsm_release_pack() because
- * we still need it to pack l_remote_handle to MDT.
- */
- rc = ll_close_inode_openhandle(inode, och, MDS_HSM_RELEASE,
- &data_version);
- och = NULL;
-
-out:
- if (och && !IS_ERR(och)) /* close the file */
- ll_lease_close(och, inode, NULL);
-
- return rc;
-}
-
-struct ll_swap_stack {
- u64 dv1;
- u64 dv2;
- struct inode *inode1;
- struct inode *inode2;
- bool check_dv1;
- bool check_dv2;
-};
-
-static int ll_swap_layouts(struct file *file1, struct file *file2,
- struct lustre_swap_layouts *lsl)
-{
- struct mdc_swap_layouts msl;
- struct md_op_data *op_data;
- __u32 gid;
- __u64 dv;
- struct ll_swap_stack *llss = NULL;
- int rc;
-
- llss = kzalloc(sizeof(*llss), GFP_NOFS);
- if (!llss)
- return -ENOMEM;
-
- llss->inode1 = file_inode(file1);
- llss->inode2 = file_inode(file2);
-
- rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
- if (rc < 0)
- goto free;
-
- /* we use 2 bool because it is easier to swap than 2 bits */
- if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
- llss->check_dv1 = true;
-
- if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
- llss->check_dv2 = true;
-
- /* we cannot use lsl->sl_dvX directly because we may swap them */
- llss->dv1 = lsl->sl_dv1;
- llss->dv2 = lsl->sl_dv2;
-
- rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
- if (!rc) /* same file, done! */
- goto free;
-
- if (rc < 0) { /* sequentialize it */
- swap(llss->inode1, llss->inode2);
- swap(file1, file2);
- swap(llss->dv1, llss->dv2);
- swap(llss->check_dv1, llss->check_dv2);
- }
-
- gid = lsl->sl_gid;
- if (gid != 0) { /* application asks to flush dirty cache */
- rc = ll_get_grouplock(llss->inode1, file1, gid);
- if (rc < 0)
- goto free;
-
- rc = ll_get_grouplock(llss->inode2, file2, gid);
- if (rc < 0) {
- ll_put_grouplock(llss->inode1, file1, gid);
- goto free;
- }
- }
-
- /* ultimate check, before swapping the layouts we check if
- * dataversion has changed (if requested)
- */
- if (llss->check_dv1) {
- rc = ll_data_version(llss->inode1, &dv, 0);
- if (rc)
- goto putgl;
- if (dv != llss->dv1) {
- rc = -EAGAIN;
- goto putgl;
- }
- }
-
- if (llss->check_dv2) {
- rc = ll_data_version(llss->inode2, &dv, 0);
- if (rc)
- goto putgl;
- if (dv != llss->dv2) {
- rc = -EAGAIN;
- goto putgl;
- }
- }
-
- /* struct md_op_data is used to send the swap args to the mdt
- * only flags is missing, so we use struct mdc_swap_layouts
- * through the md_op_data->op_data
- */
- /* flags from user space have to be converted before they are send to
- * server, no flag is sent today, they are only used on the client
- */
- msl.msl_flags = 0;
- rc = -ENOMEM;
- op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
- 0, LUSTRE_OPC_ANY, &msl);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto free;
- }
-
- rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
- sizeof(*op_data), op_data, NULL);
- ll_finish_md_op_data(op_data);
-
-putgl:
- if (gid != 0) {
- ll_put_grouplock(llss->inode2, file2, gid);
- ll_put_grouplock(llss->inode1, file1, gid);
- }
-
-free:
- kfree(llss);
-
- return rc;
-}
-
-int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
-{
- struct md_op_data *op_data;
- int rc;
-
- /* Detect out-of range masks */
- if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
- return -EINVAL;
-
- /* Non-root users are forbidden to set or clear flags which are
- * NOT defined in HSM_USER_MASK.
- */
- if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- /* Detect out-of range archive id */
- if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
- (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
- return -EINVAL;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hss);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
- sizeof(*op_data), op_data, NULL);
-
- ll_finish_md_op_data(op_data);
-
- return rc;
-}
-
-static int ll_hsm_import(struct inode *inode, struct file *file,
- struct hsm_user_import *hui)
-{
- struct hsm_state_set *hss = NULL;
- struct iattr *attr = NULL;
- int rc;
-
- if (!S_ISREG(inode->i_mode))
- return -EINVAL;
-
- /* set HSM flags */
- hss = kzalloc(sizeof(*hss), GFP_NOFS);
- if (!hss)
- return -ENOMEM;
-
- hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
- hss->hss_archive_id = hui->hui_archive_id;
- hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
- rc = ll_hsm_state_set(inode, hss);
- if (rc != 0)
- goto free_hss;
-
- attr = kzalloc(sizeof(*attr), GFP_NOFS);
- if (!attr) {
- rc = -ENOMEM;
- goto free_hss;
- }
-
- attr->ia_mode = hui->hui_mode & 0777;
- attr->ia_mode |= S_IFREG;
- attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
- attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
- attr->ia_size = hui->hui_size;
- attr->ia_mtime.tv_sec = hui->hui_mtime;
- attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
- attr->ia_atime.tv_sec = hui->hui_atime;
- attr->ia_atime.tv_nsec = hui->hui_atime_ns;
-
- attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
- ATTR_UID | ATTR_GID |
- ATTR_MTIME | ATTR_MTIME_SET |
- ATTR_ATIME | ATTR_ATIME_SET;
-
- inode_lock(inode);
-
- rc = ll_setattr_raw(file->f_path.dentry, attr, true);
- if (rc == -ENODATA)
- rc = 0;
-
- inode_unlock(inode);
-
- kfree(attr);
-free_hss:
- kfree(hss);
- return rc;
-}
-
-static inline long ll_lease_type_from_fmode(fmode_t fmode)
-{
- return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
- ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
-}
-
-static long
-ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct inode *inode = file_inode(file);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- int flags, rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p),cmd=%x\n",
- PFID(ll_inode2fid(inode)), inode, cmd);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
-
- /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
- if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
- return -ENOTTY;
-
- switch (cmd) {
- case LL_IOC_GETFLAGS:
- /* Get the current value of the file flags */
- return put_user(fd->fd_flags, (int __user *)arg);
- case LL_IOC_SETFLAGS:
- case LL_IOC_CLRFLAGS:
- /* Set or clear specific file flags */
- /* XXX This probably needs checks to ensure the flags are
- * not abused, and to handle any flag side effects.
- */
- if (get_user(flags, (int __user *)arg))
- return -EFAULT;
-
- if (cmd == LL_IOC_SETFLAGS) {
- if ((flags & LL_FILE_IGNORE_LOCK) &&
- !(file->f_flags & O_DIRECT)) {
- CERROR("%s: unable to disable locking on non-O_DIRECT file\n",
- current->comm);
- return -EINVAL;
- }
-
- fd->fd_flags |= flags;
- } else {
- fd->fd_flags &= ~flags;
- }
- return 0;
- case LL_IOC_LOV_SETSTRIPE:
- return ll_lov_setstripe(inode, file, arg);
- case LL_IOC_LOV_SETEA:
- return ll_lov_setea(inode, file, arg);
- case LL_IOC_LOV_SWAP_LAYOUTS: {
- struct file *file2;
- struct lustre_swap_layouts lsl;
-
- if (copy_from_user(&lsl, (char __user *)arg,
- sizeof(struct lustre_swap_layouts)))
- return -EFAULT;
-
- if ((file->f_flags & O_ACCMODE) == O_RDONLY)
- return -EPERM;
-
- file2 = fget(lsl.sl_fd);
- if (!file2)
- return -EBADF;
-
- /* O_WRONLY or O_RDWR */
- if ((file2->f_flags & O_ACCMODE) == O_RDONLY) {
- rc = -EPERM;
- goto out;
- }
-
- if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
- struct obd_client_handle *och = NULL;
- struct ll_inode_info *lli;
- struct inode *inode2;
-
- if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE) {
- rc = -EINVAL;
- goto out;
- }
-
- lli = ll_i2info(inode);
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- och = fd->fd_lease_och;
- fd->fd_lease_och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
- if (!och) {
- rc = -ENOLCK;
- goto out;
- }
- inode2 = file_inode(file2);
- rc = ll_swap_layouts_close(och, inode, inode2);
- } else {
- rc = ll_swap_layouts(file, file2, &lsl);
- }
-out:
- fput(file2);
- return rc;
- }
- case LL_IOC_LOV_GETSTRIPE:
- return ll_file_getstripe(inode,
- (struct lov_user_md __user *)arg);
- case FSFILT_IOC_GETFLAGS:
- case FSFILT_IOC_SETFLAGS:
- return ll_iocontrol(inode, file, cmd, arg);
- case FSFILT_IOC_GETVERSION_OLD:
- case FSFILT_IOC_GETVERSION:
- return put_user(inode->i_generation, (int __user *)arg);
- case LL_IOC_GROUP_LOCK:
- return ll_get_grouplock(inode, file, arg);
- case LL_IOC_GROUP_UNLOCK:
- return ll_put_grouplock(inode, file, arg);
- case IOC_OBD_STATFS:
- return ll_obd_statfs(inode, (void __user *)arg);
-
- /* We need to special case any other ioctls we want to handle,
- * to send them to the MDS/OST as appropriate and to properly
- * network encode the arg field.
- case FSFILT_IOC_SETVERSION_OLD:
- case FSFILT_IOC_SETVERSION:
- */
- case LL_IOC_FLUSHCTX:
- return ll_flush_ctx(inode);
- case LL_IOC_PATH2FID: {
- if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
- sizeof(struct lu_fid)))
- return -EFAULT;
-
- return 0;
- }
- case LL_IOC_GETPARENT:
- return ll_getparent(file, (struct getparent __user *)arg);
- case OBD_IOC_FID2PATH:
- return ll_fid2path(inode, (void __user *)arg);
- case LL_IOC_DATA_VERSION: {
- struct ioc_data_version idv;
- int rc;
-
- if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
- return -EFAULT;
-
- idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
- rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
- if (rc == 0 && copy_to_user((char __user *)arg, &idv,
- sizeof(idv)))
- return -EFAULT;
-
- return rc;
- }
-
- case LL_IOC_GET_MDTIDX: {
- int mdtidx;
-
- mdtidx = ll_get_mdt_idx(inode);
- if (mdtidx < 0)
- return mdtidx;
-
- if (put_user(mdtidx, (int __user *)arg))
- return -EFAULT;
-
- return 0;
- }
- case OBD_IOC_GETDTNAME:
- case OBD_IOC_GETMDNAME:
- return ll_get_obd_name(inode, cmd, arg);
- case LL_IOC_HSM_STATE_GET: {
- struct md_op_data *op_data;
- struct hsm_user_state *hus;
- int rc;
-
- hus = kzalloc(sizeof(*hus), GFP_NOFS);
- if (!hus)
- return -ENOMEM;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hus);
- if (IS_ERR(op_data)) {
- kfree(hus);
- return PTR_ERR(op_data);
- }
-
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
- op_data, NULL);
-
- if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
- rc = -EFAULT;
-
- ll_finish_md_op_data(op_data);
- kfree(hus);
- return rc;
- }
- case LL_IOC_HSM_STATE_SET: {
- struct hsm_state_set *hss;
- int rc;
-
- hss = memdup_user((char __user *)arg, sizeof(*hss));
- if (IS_ERR(hss))
- return PTR_ERR(hss);
-
- rc = ll_hsm_state_set(inode, hss);
-
- kfree(hss);
- return rc;
- }
- case LL_IOC_HSM_ACTION: {
- struct md_op_data *op_data;
- struct hsm_current_action *hca;
- int rc;
-
- hca = kzalloc(sizeof(*hca), GFP_NOFS);
- if (!hca)
- return -ENOMEM;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hca);
- if (IS_ERR(op_data)) {
- kfree(hca);
- return PTR_ERR(op_data);
- }
-
- rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
- op_data, NULL);
-
- if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
- rc = -EFAULT;
-
- ll_finish_md_op_data(op_data);
- kfree(hca);
- return rc;
- }
- case LL_IOC_SET_LEASE: {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_client_handle *och = NULL;
- bool lease_broken;
- fmode_t fmode;
-
- switch (arg) {
- case LL_LEASE_WRLCK:
- if (!(file->f_mode & FMODE_WRITE))
- return -EPERM;
- fmode = FMODE_WRITE;
- break;
- case LL_LEASE_RDLCK:
- if (!(file->f_mode & FMODE_READ))
- return -EPERM;
- fmode = FMODE_READ;
- break;
- case LL_LEASE_UNLCK:
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- och = fd->fd_lease_och;
- fd->fd_lease_och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
-
- if (!och)
- return -ENOLCK;
-
- fmode = och->och_flags;
- rc = ll_lease_close(och, inode, &lease_broken);
- if (rc < 0)
- return rc;
-
- if (lease_broken)
- fmode = 0;
-
- return ll_lease_type_from_fmode(fmode);
- default:
- return -EINVAL;
- }
-
- CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
-
- /* apply for lease */
- och = ll_lease_open(inode, file, fmode, 0);
- if (IS_ERR(och))
- return PTR_ERR(och);
-
- rc = 0;
- mutex_lock(&lli->lli_och_mutex);
- if (!fd->fd_lease_och) {
- fd->fd_lease_och = och;
- och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
- if (och) {
- /* impossible now that only excl is supported for now */
- ll_lease_close(och, inode, &lease_broken);
- rc = -EBUSY;
- }
- return rc;
- }
- case LL_IOC_GET_LEASE: {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ldlm_lock *lock = NULL;
- fmode_t fmode = 0;
-
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och) {
- struct obd_client_handle *och = fd->fd_lease_och;
-
- lock = ldlm_handle2lock(&och->och_lease_handle);
- if (lock) {
- lock_res_and_lock(lock);
- if (!ldlm_is_cancel(lock))
- fmode = och->och_flags;
- unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
- }
- }
- mutex_unlock(&lli->lli_och_mutex);
- return ll_lease_type_from_fmode(fmode);
- }
- case LL_IOC_HSM_IMPORT: {
- struct hsm_user_import *hui;
-
- hui = memdup_user((void __user *)arg, sizeof(*hui));
- if (IS_ERR(hui))
- return PTR_ERR(hui);
-
- rc = ll_hsm_import(inode, file, hui);
-
- kfree(hui);
- return rc;
- }
- default: {
- int err;
-
- if (ll_iocontrol_call(inode, file, cmd, arg, &err) ==
- LLIOC_STOP)
- return err;
-
- return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
- (void __user *)arg);
- }
- }
-}
-
-static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
-{
- struct inode *inode = file_inode(file);
- loff_t retval, eof = 0;
-
- retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
- (origin == SEEK_CUR) ? file->f_pos : 0);
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), to=%llu=%#llx(%d)\n",
- PFID(ll_inode2fid(inode)), inode, retval, retval, origin);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
-
- if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
- retval = ll_glimpse_size(inode);
- if (retval != 0)
- return retval;
- eof = i_size_read(inode);
- }
-
- return generic_file_llseek_size(file, offset, origin,
- ll_file_maxbytes(inode), eof);
-}
-
-static int ll_flush(struct file *file, fl_owner_t id)
-{
- struct inode *inode = file_inode(file);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- int rc, err;
-
- LASSERT(!S_ISDIR(inode->i_mode));
-
- /* catch async errors that were recorded back when async writeback
- * failed for pages in this mapping.
- */
- rc = lli->lli_async_rc;
- lli->lli_async_rc = 0;
- if (lli->lli_clob) {
- err = lov_read_and_clear_async_rc(lli->lli_clob);
- if (!rc)
- rc = err;
- }
-
- /* The application has been told about write failure already.
- * Do not report failure again.
- */
- if (fd->fd_write_failed)
- return 0;
- return rc ? -EIO : 0;
-}
-
-/**
- * Called to make sure a portion of file has been written out.
- * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
- *
- * Return how many pages have been written.
- */
-int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
- enum cl_fsync_mode mode, int ignore_layout)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct cl_fsync_io *fio;
- int result;
- u16 refcheck;
-
- if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
- mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
- return -EINVAL;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = ll_i2info(inode)->lli_clob;
- io->ci_ignore_layout = ignore_layout;
-
- /* initialize parameters for sync */
- fio = &io->u.ci_fsync;
- fio->fi_start = start;
- fio->fi_end = end;
- fio->fi_fid = ll_inode2fid(inode);
- fio->fi_mode = mode;
- fio->fi_nr_written = 0;
-
- if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
- result = cl_io_loop(env, io);
- else
- result = io->ci_result;
- if (result == 0)
- result = fio->fi_nr_written;
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
-
- return result;
-}
-
-int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-{
- struct inode *inode = file_inode(file);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ptlrpc_request *req;
- int rc, err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
-
- rc = file_write_and_wait_range(file, start, end);
- inode_lock(inode);
-
- /* catch async errors that were recorded back when async writeback
- * failed for pages in this mapping.
- */
- if (!S_ISDIR(inode->i_mode)) {
- err = lli->lli_async_rc;
- lli->lli_async_rc = 0;
- if (rc == 0)
- rc = err;
- if (lli->lli_clob) {
- err = lov_read_and_clear_async_rc(lli->lli_clob);
- if (rc == 0)
- rc = err;
- }
- }
-
- err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
- if (!rc)
- rc = err;
- if (!err)
- ptlrpc_req_finished(req);
-
- if (S_ISREG(inode->i_mode)) {
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
- err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
- if (rc == 0 && err < 0)
- rc = err;
- if (rc < 0)
- fd->fd_write_failed = true;
- else
- fd->fd_write_failed = false;
- }
-
- inode_unlock(inode);
- return rc;
-}
-
-static int
-ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
-{
- struct inode *inode = file_inode(file);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ldlm_enqueue_info einfo = {
- .ei_type = LDLM_FLOCK,
- .ei_cb_cp = ldlm_flock_completion_ast,
- .ei_cbdata = file_lock,
- };
- struct md_op_data *op_data;
- struct lustre_handle lockh = {0};
- union ldlm_policy_data flock = { { 0 } };
- int fl_type = file_lock->fl_type;
- __u64 flags = 0;
- int rc;
- int rc2 = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID " file_lock=%p\n",
- PFID(ll_inode2fid(inode)), file_lock);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
-
- if (file_lock->fl_flags & FL_FLOCK)
- LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
- else if (!(file_lock->fl_flags & FL_POSIX))
- return -EINVAL;
-
- flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
- flock.l_flock.pid = file_lock->fl_pid;
- flock.l_flock.start = file_lock->fl_start;
- flock.l_flock.end = file_lock->fl_end;
-
- /* Somewhat ugly workaround for svc lockd.
- * lockd installs custom fl_lmops->lm_compare_owner that checks
- * for the fl_owner to be the same (which it always is on local node
- * I guess between lockd processes) and then compares pid.
- * As such we assign pid to the owner field to make it all work,
- * conflict with normal locks is unlikely since pid space and
- * pointer space for current->files are not intersecting
- */
- if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
- flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
-
- switch (fl_type) {
- case F_RDLCK:
- einfo.ei_mode = LCK_PR;
- break;
- case F_UNLCK:
- /* An unlock request may or may not have any relation to
- * existing locks so we may not be able to pass a lock handle
- * via a normal ldlm_lock_cancel() request. The request may even
- * unlock a byte range in the middle of an existing lock. In
- * order to process an unlock request we need all of the same
- * information that is given with a normal read or write record
- * lock request. To avoid creating another ldlm unlock (cancel)
- * message we'll treat a LCK_NL flock request as an unlock.
- */
- einfo.ei_mode = LCK_NL;
- break;
- case F_WRLCK:
- einfo.ei_mode = LCK_PW;
- break;
- default:
- CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
- return -ENOTSUPP;
- }
-
- switch (cmd) {
- case F_SETLKW:
-#ifdef F_SETLKW64
- case F_SETLKW64:
-#endif
- flags = 0;
- break;
- case F_SETLK:
-#ifdef F_SETLK64
- case F_SETLK64:
-#endif
- flags = LDLM_FL_BLOCK_NOWAIT;
- break;
- case F_GETLK:
-#ifdef F_GETLK64
- case F_GETLK64:
-#endif
- flags = LDLM_FL_TEST_LOCK;
- break;
- default:
- CERROR("unknown fcntl lock command: %d\n", cmd);
- return -EINVAL;
- }
-
- /*
- * Save the old mode so that if the mode in the lock changes we
- * can decrement the appropriate reader or writer refcount.
- */
- file_lock->fl_type = einfo.ei_mode;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- CDEBUG(D_DLMTRACE, "inode=" DFID ", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n",
- PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
- einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
-
- rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
- flags);
-
- /* Restore the file lock type if not TEST lock. */
- if (!(flags & LDLM_FL_TEST_LOCK))
- file_lock->fl_type = fl_type;
-
- if ((rc == 0 || file_lock->fl_type == F_UNLCK) &&
- !(flags & LDLM_FL_TEST_LOCK))
- rc2 = locks_lock_file_wait(file, file_lock);
-
- if (rc2 && file_lock->fl_type != F_UNLCK) {
- einfo.ei_mode = LCK_NL;
- md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
- &lockh, flags);
- rc = rc2;
- }
-
- ll_finish_md_op_data(op_data);
-
- return rc;
-}
-
-int ll_get_fid_by_name(struct inode *parent, const char *name,
- int namelen, struct lu_fid *fid,
- struct inode **inode)
-{
- struct md_op_data *op_data = NULL;
- struct ptlrpc_request *req;
- struct mdt_body *body;
- int rc;
-
- op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
- rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc < 0)
- return rc;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EFAULT;
- goto out_req;
- }
- if (fid)
- *fid = body->mbo_fid1;
-
- if (inode)
- rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
-out_req:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
- const char *name, int namelen)
-{
- struct ptlrpc_request *request = NULL;
- struct obd_client_handle *och = NULL;
- struct inode *child_inode = NULL;
- struct dentry *dchild = NULL;
- struct md_op_data *op_data;
- struct mdt_body *body;
- u64 data_version = 0;
- struct qstr qstr;
- int rc;
-
- CDEBUG(D_VFSTRACE, "migrate %s under " DFID " to MDT%d\n",
- name, PFID(ll_inode2fid(parent)), mdtidx);
-
- op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- /* Get child FID first */
- qstr.hash = full_name_hash(parent, name, namelen);
- qstr.name = name;
- qstr.len = namelen;
- dchild = d_lookup(file_dentry(file), &qstr);
- if (dchild) {
- op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
- if (dchild->d_inode)
- child_inode = igrab(dchild->d_inode);
- dput(dchild);
- }
-
- if (!child_inode) {
- rc = ll_get_fid_by_name(parent, name, namelen,
- &op_data->op_fid3, &child_inode);
- if (rc)
- goto out_free;
- }
-
- if (!child_inode) {
- rc = -EINVAL;
- goto out_free;
- }
-
- inode_lock(child_inode);
- op_data->op_fid3 = *ll_inode2fid(child_inode);
- if (!fid_is_sane(&op_data->op_fid3)) {
- CERROR("%s: migrate %s, but fid " DFID " is insane\n",
- ll_get_fsname(parent->i_sb, NULL, 0), name,
- PFID(&op_data->op_fid3));
- rc = -EINVAL;
- goto out_unlock;
- }
-
- rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
- if (rc < 0)
- goto out_unlock;
-
- if (rc == mdtidx) {
- CDEBUG(D_INFO, "%s: " DFID " is already on MDT%d.\n", name,
- PFID(&op_data->op_fid3), mdtidx);
- rc = 0;
- goto out_unlock;
- }
-again:
- if (S_ISREG(child_inode->i_mode)) {
- och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
- if (IS_ERR(och)) {
- rc = PTR_ERR(och);
- och = NULL;
- goto out_unlock;
- }
-
- rc = ll_data_version(child_inode, &data_version,
- LL_DV_WR_FLUSH);
- if (rc)
- goto out_close;
-
- op_data->op_handle = och->och_fh;
- op_data->op_data = och->och_mod;
- op_data->op_data_version = data_version;
- op_data->op_lease_handle = och->och_lease_handle;
- op_data->op_bias |= MDS_RENAME_MIGRATE;
- }
-
- op_data->op_mds = mdtidx;
- op_data->op_cli_flags = CLI_MIGRATE;
- rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
- namelen, name, namelen, &request);
- if (!rc) {
- LASSERT(request);
- ll_update_times(request, parent);
-
- body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
- LASSERT(body);
-
- /*
- * If the server does release layout lock, then we cleanup
- * the client och here, otherwise release it in out_close:
- */
- if (och && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
- obd_mod_put(och->och_mod);
- md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp,
- och);
- och->och_fh.cookie = DEAD_HANDLE_MAGIC;
- kfree(och);
- och = NULL;
- }
- }
-
- if (request) {
- ptlrpc_req_finished(request);
- request = NULL;
- }
-
- /* Try again if the file layout has changed. */
- if (rc == -EAGAIN && S_ISREG(child_inode->i_mode))
- goto again;
-
-out_close:
- if (och) /* close the file */
- ll_lease_close(och, child_inode, NULL);
- if (!rc)
- clear_nlink(child_inode);
-out_unlock:
- inode_unlock(child_inode);
- iput(child_inode);
-out_free:
- ll_finish_md_op_data(op_data);
- return rc;
-}
-
-static int
-ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
-{
- return -ENOSYS;
-}
-
-/**
- * test if some locks matching bits and l_req_mode are acquired
- * - bits can be in different locks
- * - if found clear the common lock bits in *bits
- * - the bits not found, are kept in *bits
- * \param inode [IN]
- * \param bits [IN] searched lock bits [IN]
- * \param l_req_mode [IN] searched lock mode
- * \retval boolean, true iff all bits are found
- */
-int ll_have_md_lock(struct inode *inode, __u64 *bits,
- enum ldlm_mode l_req_mode)
-{
- struct lustre_handle lockh;
- union ldlm_policy_data policy;
- enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
- (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
- struct lu_fid *fid;
- __u64 flags;
- int i;
-
- if (!inode)
- return 0;
-
- fid = &ll_i2info(inode)->lli_fid;
- CDEBUG(D_INFO, "trying to match res " DFID " mode %s\n", PFID(fid),
- ldlm_lockname[mode]);
-
- flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
- for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
- policy.l_inodebits.bits = *bits & (1 << i);
- if (policy.l_inodebits.bits == 0)
- continue;
-
- if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
- &policy, mode, &lockh)) {
- struct ldlm_lock *lock;
-
- lock = ldlm_handle2lock(&lockh);
- if (lock) {
- *bits &=
- ~(lock->l_policy_data.l_inodebits.bits);
- LDLM_LOCK_PUT(lock);
- } else {
- *bits &= ~policy.l_inodebits.bits;
- }
- }
- }
- return *bits == 0;
-}
-
-enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
- struct lustre_handle *lockh, __u64 flags,
- enum ldlm_mode mode)
-{
- union ldlm_policy_data policy = { .l_inodebits = { bits } };
- struct lu_fid *fid;
-
- fid = &ll_i2info(inode)->lli_fid;
- CDEBUG(D_INFO, "trying to match res " DFID "\n", PFID(fid));
-
- return md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
- fid, LDLM_IBITS, &policy, mode, lockh);
-}
-
-static int ll_inode_revalidate_fini(struct inode *inode, int rc)
-{
- /* Already unlinked. Just update nlink and return success */
- if (rc == -ENOENT) {
- clear_nlink(inode);
- /* If it is striped directory, and there is bad stripe
- * Let's revalidate the dentry again, instead of returning
- * error
- */
- if (S_ISDIR(inode->i_mode) && ll_i2info(inode)->lli_lsm_md)
- return 0;
-
- /* This path cannot be hit for regular files unless in
- * case of obscure races, so no need to validate size.
- */
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
- return 0;
- } else if (rc != 0) {
- CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
- "%s: revalidate FID " DFID " error: rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), rc);
- }
-
- return rc;
-}
-
-static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
-{
- struct inode *inode = d_inode(dentry);
- struct ptlrpc_request *req = NULL;
- struct obd_export *exp;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p),name=%pd\n",
- PFID(ll_inode2fid(inode)), inode, dentry);
-
- exp = ll_i2mdexp(inode);
-
- /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
- * But under CMD case, it caused some lock issues, should be fixed
- * with new CMD ibits lock. See bug 12718
- */
- if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
- struct lookup_intent oit = { .it_op = IT_GETATTR };
- struct md_op_data *op_data;
-
- if (ibits == MDS_INODELOCK_LOOKUP)
- oit.it_op = IT_LOOKUP;
-
- /* Call getattr by fid, so do not provide name at all. */
- op_data = ll_prep_md_op_data(NULL, inode,
- inode, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = md_intent_lock(exp, op_data, &oit, &req,
- &ll_md_blocking_ast, 0);
- ll_finish_md_op_data(op_data);
- if (rc < 0) {
- rc = ll_inode_revalidate_fini(inode, rc);
- goto out;
- }
-
- rc = ll_revalidate_it_finish(req, &oit, inode);
- if (rc != 0) {
- ll_intent_release(&oit);
- goto out;
- }
-
- /* Unlinked? Unhash dentry, so it is not picked up later by
- * do_lookup() -> ll_revalidate_it(). We cannot use d_drop
- * here to preserve get_cwd functionality on 2.6.
- * Bug 10503
- */
- if (!d_inode(dentry)->i_nlink) {
- spin_lock(&inode->i_lock);
- d_lustre_invalidate(dentry, 0);
- spin_unlock(&inode->i_lock);
- }
-
- ll_lookup_finish_locks(&oit, inode);
- } else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
- struct ll_sb_info *sbi = ll_i2sbi(d_inode(dentry));
- u64 valid = OBD_MD_FLGETATTR;
- struct md_op_data *op_data;
- int ealen = 0;
-
- if (S_ISREG(inode->i_mode)) {
- rc = ll_get_default_mdsize(sbi, &ealen);
- if (rc)
- return rc;
- valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
- 0, ealen, LUSTRE_OPC_ANY,
- NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = valid;
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc)
- return ll_inode_revalidate_fini(inode, rc);
-
- rc = ll_prep_inode(&inode, req, NULL, NULL);
- }
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int ll_merge_md_attr(struct inode *inode)
-{
- struct cl_attr attr = { 0 };
- int rc;
-
- LASSERT(ll_i2info(inode)->lli_lsm_md);
- rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
- &attr, ll_md_blocking_ast);
- if (rc)
- return rc;
-
- set_nlink(inode, attr.cat_nlink);
- inode->i_blocks = attr.cat_blocks;
- i_size_write(inode, attr.cat_size);
-
- ll_i2info(inode)->lli_atime = attr.cat_atime;
- ll_i2info(inode)->lli_mtime = attr.cat_mtime;
- ll_i2info(inode)->lli_ctime = attr.cat_ctime;
-
- return 0;
-}
-
-static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
-{
- struct inode *inode = d_inode(dentry);
- int rc;
-
- rc = __ll_inode_revalidate(dentry, ibits);
- if (rc != 0)
- return rc;
-
- /* if object isn't regular file, don't validate size */
- if (!S_ISREG(inode->i_mode)) {
- if (S_ISDIR(inode->i_mode) &&
- ll_i2info(inode)->lli_lsm_md) {
- rc = ll_merge_md_attr(inode);
- if (rc)
- return rc;
- }
-
- LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
- LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
- LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
- } else {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- /* In case of restore, the MDT has the right size and has
- * already send it back without granting the layout lock,
- * inode is up-to-date so glimpse is useless.
- * Also to glimpse we need the layout, in case of a running
- * restore the MDT holds the layout lock so the glimpse will
- * block up to the end of restore (getattr will block)
- */
- if (!test_bit(LLIF_FILE_RESTORING, &lli->lli_flags))
- rc = ll_glimpse_size(inode);
- }
- return rc;
-}
-
-int ll_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags)
-{
- struct inode *inode = d_inode(path->dentry);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- int res;
-
- res = ll_inode_revalidate(path->dentry,
- MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP);
- ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
-
- if (res)
- return res;
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30);
-
- stat->dev = inode->i_sb->s_dev;
- if (ll_need_32bit_api(sbi))
- stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
- else
- stat->ino = inode->i_ino;
- stat->mode = inode->i_mode;
- stat->uid = inode->i_uid;
- stat->gid = inode->i_gid;
- stat->rdev = inode->i_rdev;
- stat->atime = inode->i_atime;
- stat->mtime = inode->i_mtime;
- stat->ctime = inode->i_ctime;
- stat->blksize = 1 << inode->i_blkbits;
-
- stat->nlink = inode->i_nlink;
- stat->size = i_size_read(inode);
- stat->blocks = inode->i_blocks;
-
- return 0;
-}
-
-static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len)
-{
- int rc;
- size_t num_bytes;
- struct fiemap *fiemap;
- unsigned int extent_count = fieinfo->fi_extents_max;
-
- num_bytes = sizeof(*fiemap) + (extent_count *
- sizeof(struct fiemap_extent));
- fiemap = kvzalloc(num_bytes, GFP_KERNEL);
- if (!fiemap)
- return -ENOMEM;
-
- fiemap->fm_flags = fieinfo->fi_flags;
- fiemap->fm_extent_count = fieinfo->fi_extents_max;
- fiemap->fm_start = start;
- fiemap->fm_length = len;
-
- if (extent_count > 0 &&
- copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
- sizeof(struct fiemap_extent))) {
- rc = -EFAULT;
- goto out;
- }
-
- rc = ll_do_fiemap(inode, fiemap, num_bytes);
-
- fieinfo->fi_flags = fiemap->fm_flags;
- fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
- if (extent_count > 0 &&
- copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
- fiemap->fm_mapped_extents *
- sizeof(struct fiemap_extent))) {
- rc = -EFAULT;
- goto out;
- }
-out:
- kvfree(fiemap);
- return rc;
-}
-
-struct posix_acl *ll_get_acl(struct inode *inode, int type)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct posix_acl *acl = NULL;
-
- spin_lock(&lli->lli_lock);
- /* VFS' acl_permission_check->check_acl will release the refcount */
- acl = posix_acl_dup(lli->lli_posix_acl);
- spin_unlock(&lli->lli_lock);
-
- return acl;
-}
-
-int ll_inode_permission(struct inode *inode, int mask)
-{
- struct ll_sb_info *sbi;
- struct root_squash_info *squash;
- const struct cred *old_cred = NULL;
- struct cred *cred = NULL;
- bool squash_id = false;
- cfs_cap_t cap;
- int rc = 0;
-
- if (mask & MAY_NOT_BLOCK)
- return -ECHILD;
-
- /* as root inode are NOT getting validated in lookup operation,
- * need to do it before permission check.
- */
-
- if (is_root_inode(inode)) {
- rc = __ll_inode_revalidate(inode->i_sb->s_root,
- MDS_INODELOCK_LOOKUP);
- if (rc)
- return rc;
- }
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), inode mode %x mask %o\n",
- PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
-
- /* squash fsuid/fsgid if needed */
- sbi = ll_i2sbi(inode);
- squash = &sbi->ll_squash;
- if (unlikely(squash->rsi_uid &&
- uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
- !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
- squash_id = true;
- }
-
- if (squash_id) {
- CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
- __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
- squash->rsi_uid, squash->rsi_gid);
-
- /*
- * update current process's credentials
- * and FS capability
- */
- cred = prepare_creds();
- if (!cred)
- return -ENOMEM;
-
- cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
- cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
- for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
- if ((1 << cap) & CFS_CAP_FS_MASK)
- cap_lower(cred->cap_effective, cap);
- }
- old_cred = override_creds(cred);
- }
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
- rc = generic_permission(inode, mask);
-
- /* restore current process's credentials and FS capability */
- if (squash_id) {
- revert_creds(old_cred);
- put_cred(cred);
- }
-
- return rc;
-}
-
-/* -o localflock - only provides locally consistent flock locks */
-const struct file_operations ll_file_operations = {
- .read_iter = ll_file_read_iter,
- .write_iter = ll_file_write_iter,
- .unlocked_ioctl = ll_file_ioctl,
- .open = ll_file_open,
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
- .splice_read = generic_file_splice_read,
- .fsync = ll_fsync,
- .flush = ll_flush
-};
-
-const struct file_operations ll_file_operations_flock = {
- .read_iter = ll_file_read_iter,
- .write_iter = ll_file_write_iter,
- .unlocked_ioctl = ll_file_ioctl,
- .open = ll_file_open,
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
- .splice_read = generic_file_splice_read,
- .fsync = ll_fsync,
- .flush = ll_flush,
- .flock = ll_file_flock,
- .lock = ll_file_flock
-};
-
-/* These are for -o noflock - to return ENOSYS on flock calls */
-const struct file_operations ll_file_operations_noflock = {
- .read_iter = ll_file_read_iter,
- .write_iter = ll_file_write_iter,
- .unlocked_ioctl = ll_file_ioctl,
- .open = ll_file_open,
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
- .splice_read = generic_file_splice_read,
- .fsync = ll_fsync,
- .flush = ll_flush,
- .flock = ll_file_noflock,
- .lock = ll_file_noflock
-};
-
-const struct inode_operations ll_file_inode_operations = {
- .setattr = ll_setattr,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
- .fiemap = ll_fiemap,
- .get_acl = ll_get_acl,
-};
-
-/* dynamic ioctl number support routines */
-static struct llioc_ctl_data {
- struct rw_semaphore ioc_sem;
- struct list_head ioc_head;
-} llioc = {
- __RWSEM_INITIALIZER(llioc.ioc_sem),
- LIST_HEAD_INIT(llioc.ioc_head)
-};
-
-struct llioc_data {
- struct list_head iocd_list;
- unsigned int iocd_size;
- llioc_callback_t iocd_cb;
- unsigned int iocd_count;
- unsigned int iocd_cmd[0];
-};
-
-void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
-{
- unsigned int size;
- struct llioc_data *in_data = NULL;
-
- if (!cb || !cmd || count > LLIOC_MAX_CMD || count < 0)
- return NULL;
-
- size = sizeof(*in_data) + count * sizeof(unsigned int);
- in_data = kzalloc(size, GFP_NOFS);
- if (!in_data)
- return NULL;
-
- in_data->iocd_size = size;
- in_data->iocd_cb = cb;
- in_data->iocd_count = count;
- memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
-
- down_write(&llioc.ioc_sem);
- list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
- up_write(&llioc.ioc_sem);
-
- return in_data;
-}
-EXPORT_SYMBOL(ll_iocontrol_register);
-
-void ll_iocontrol_unregister(void *magic)
-{
- struct llioc_data *tmp;
-
- if (!magic)
- return;
-
- down_write(&llioc.ioc_sem);
- list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
- if (tmp == magic) {
- list_del(&tmp->iocd_list);
- up_write(&llioc.ioc_sem);
-
- kfree(tmp);
- return;
- }
- }
- up_write(&llioc.ioc_sem);
-
- CWARN("didn't find iocontrol register block with magic: %p\n", magic);
-}
-EXPORT_SYMBOL(ll_iocontrol_unregister);
-
-static enum llioc_iter
-ll_iocontrol_call(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg, int *rcp)
-{
- enum llioc_iter ret = LLIOC_CONT;
- struct llioc_data *data;
- int rc = -EINVAL, i;
-
- down_read(&llioc.ioc_sem);
- list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
- for (i = 0; i < data->iocd_count; i++) {
- if (cmd != data->iocd_cmd[i])
- continue;
-
- ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
- break;
- }
-
- if (ret == LLIOC_STOP)
- break;
- }
- up_read(&llioc.ioc_sem);
-
- if (rcp)
- *rcp = rc;
- return ret;
-}
-
-int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *obj = lli->lli_clob;
- struct lu_env *env;
- int rc;
- u16 refcheck;
-
- if (!obj)
- return 0;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- rc = cl_conf_set(env, obj, conf);
- if (rc < 0)
- goto out;
-
- if (conf->coc_opc == OBJECT_CONF_SET) {
- struct ldlm_lock *lock = conf->coc_lock;
- struct cl_layout cl = {
- .cl_layout_gen = 0,
- };
-
- LASSERT(lock);
- LASSERT(ldlm_has_layout(lock));
-
- /* it can only be allowed to match after layout is
- * applied to inode otherwise false layout would be
- * seen. Applying layout should happen before dropping
- * the intent lock.
- */
- ldlm_lock_allow_match(lock);
-
- rc = cl_object_layout_get(env, obj, &cl);
- if (rc < 0)
- goto out;
-
- CDEBUG(D_VFSTRACE, DFID ": layout version change: %u -> %u\n",
- PFID(&lli->lli_fid), ll_layout_version_get(lli),
- cl.cl_layout_gen);
- ll_layout_version_set(lli, cl.cl_layout_gen);
- }
-out:
- cl_env_put(env, &refcheck);
- return rc;
-}
-
-/* Fetch layout from MDT with getxattr request, if it's not ready yet */
-static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
-
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req;
- struct mdt_body *body;
- void *lvbdata;
- void *lmm;
- int lmmsize;
- int rc;
-
- CDEBUG(D_INODE, DFID " LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
- PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
- lock->l_lvb_data, lock->l_lvb_len);
-
- if (lock->l_lvb_data && ldlm_is_lvb_ready(lock))
- return 0;
-
- /* if layout lock was granted right away, the layout is returned
- * within DLM_LVB of dlm reply; otherwise if the lock was ever
- * blocked and then granted via completion ast, we have to fetch
- * layout here. Please note that we can't use the LVB buffer in
- * completion AST because it doesn't have a large enough buffer
- */
- rc = ll_get_default_mdsize(sbi, &lmmsize);
- if (rc == 0)
- rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
- OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
- lmmsize, 0, &req);
- if (rc < 0)
- return rc;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EPROTO;
- goto out;
- }
-
- lmmsize = body->mbo_eadatasize;
- if (lmmsize == 0) /* empty layout */ {
- rc = 0;
- goto out;
- }
-
- lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
- if (!lmm) {
- rc = -EFAULT;
- goto out;
- }
-
- lvbdata = kvzalloc(lmmsize, GFP_NOFS);
- if (!lvbdata) {
- rc = -ENOMEM;
- goto out;
- }
-
- memcpy(lvbdata, lmm, lmmsize);
- lock_res_and_lock(lock);
- if (lock->l_lvb_data)
- kvfree(lock->l_lvb_data);
-
- lock->l_lvb_data = lvbdata;
- lock->l_lvb_len = lmmsize;
- unlock_res_and_lock(lock);
-
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-/**
- * Apply the layout to the inode. Layout lock is held and will be released
- * in this function.
- */
-static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
- struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ldlm_lock *lock;
- struct cl_object_conf conf;
- int rc = 0;
- bool lvb_ready;
- bool wait_layout = false;
-
- LASSERT(lustre_handle_is_used(lockh));
-
- lock = ldlm_handle2lock(lockh);
- LASSERT(lock);
- LASSERT(ldlm_has_layout(lock));
-
- LDLM_DEBUG(lock, "File " DFID "(%p) being reconfigured",
- PFID(&lli->lli_fid), inode);
-
- /* in case this is a caching lock and reinstate with new inode */
- md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL);
-
- lock_res_and_lock(lock);
- lvb_ready = ldlm_is_lvb_ready(lock);
- unlock_res_and_lock(lock);
- /* checking lvb_ready is racy but this is okay. The worst case is
- * that multi processes may configure the file on the same time.
- */
- if (lvb_ready) {
- rc = 0;
- goto out;
- }
-
- rc = ll_layout_fetch(inode, lock);
- if (rc < 0)
- goto out;
-
- /* for layout lock, lmm is returned in lock's lvb.
- * lvb_data is immutable if the lock is held so it's safe to access it
- * without res lock.
- *
- * set layout to file. Unlikely this will fail as old layout was
- * surely eliminated
- */
- memset(&conf, 0, sizeof(conf));
- conf.coc_opc = OBJECT_CONF_SET;
- conf.coc_inode = inode;
- conf.coc_lock = lock;
- conf.u.coc_layout.lb_buf = lock->l_lvb_data;
- conf.u.coc_layout.lb_len = lock->l_lvb_len;
- rc = ll_layout_conf(inode, &conf);
-
- /* refresh layout failed, need to wait */
- wait_layout = rc == -EBUSY;
-
-out:
- LDLM_LOCK_PUT(lock);
- ldlm_lock_decref(lockh, mode);
-
- /* wait for IO to complete if it's still being used. */
- if (wait_layout) {
- CDEBUG(D_INODE, "%s: " DFID "(%p) wait for layout reconf\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid), inode);
-
- memset(&conf, 0, sizeof(conf));
- conf.coc_opc = OBJECT_CONF_WAIT;
- conf.coc_inode = inode;
- rc = ll_layout_conf(inode, &conf);
- if (rc == 0)
- rc = -EAGAIN;
-
- CDEBUG(D_INODE,
- "%s: file=" DFID " waiting layout return: %d.\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid), rc);
- }
- return rc;
-}
-
-static int ll_layout_refresh_locked(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct md_op_data *op_data;
- struct lookup_intent it;
- struct lustre_handle lockh;
- enum ldlm_mode mode;
- struct ldlm_enqueue_info einfo = {
- .ei_type = LDLM_IBITS,
- .ei_mode = LCK_CR,
- .ei_cb_bl = &ll_md_blocking_ast,
- .ei_cb_cp = &ldlm_completion_ast,
- };
- int rc;
-
-again:
- /* mostly layout lock is caching on the local side, so try to match
- * it before grabbing layout lock mutex.
- */
- mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
- LCK_CR | LCK_CW | LCK_PR | LCK_PW);
- if (mode != 0) { /* hit cached lock */
- rc = ll_layout_lock_set(&lockh, mode, inode);
- if (rc == -EAGAIN)
- goto again;
- return rc;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
- 0, 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- /* have to enqueue one */
- memset(&it, 0, sizeof(it));
- it.it_op = IT_LAYOUT;
- lockh.cookie = 0ULL;
-
- LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file " DFID "(%p)",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(&lli->lli_fid), inode);
-
- rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
- ptlrpc_req_finished(it.it_request);
- it.it_request = NULL;
-
- ll_finish_md_op_data(op_data);
-
- mode = it.it_lock_mode;
- it.it_lock_mode = 0;
- ll_intent_drop_lock(&it);
-
- if (rc == 0) {
- /* set lock data in case this is a new lock */
- ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
- rc = ll_layout_lock_set(&lockh, mode, inode);
- if (rc == -EAGAIN)
- goto again;
- }
-
- return rc;
-}
-
-/**
- * This function checks if there exists a LAYOUT lock on the client side,
- * or enqueues it if it doesn't have one in cache.
- *
- * This function will not hold layout lock so it may be revoked any time after
- * this function returns. Any operations depend on layout should be redone
- * in that case.
- *
- * This function should be called before lov_io_init() to get an uptodate
- * layout version, the caller should save the version number and after IO
- * is finished, this function should be called again to verify that layout
- * is not changed during IO time.
- */
-int ll_layout_refresh(struct inode *inode, __u32 *gen)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc;
-
- *gen = ll_layout_version_get(lli);
- if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
- return 0;
-
- /* sanity checks */
- LASSERT(fid_is_sane(ll_inode2fid(inode)));
- LASSERT(S_ISREG(inode->i_mode));
-
- /* take layout lock mutex to enqueue layout lock exclusively. */
- mutex_lock(&lli->lli_layout_mutex);
-
- rc = ll_layout_refresh_locked(inode);
- if (rc < 0)
- goto out;
-
- *gen = ll_layout_version_get(lli);
-out:
- mutex_unlock(&lli->lli_layout_mutex);
-
- return rc;
-}
-
-/**
- * This function send a restore request to the MDT
- */
-int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
-{
- struct hsm_user_request *hur;
- int len, rc;
-
- len = sizeof(struct hsm_user_request) +
- sizeof(struct hsm_user_item);
- hur = kzalloc(len, GFP_NOFS);
- if (!hur)
- return -ENOMEM;
-
- hur->hur_request.hr_action = HUA_RESTORE;
- hur->hur_request.hr_archive_id = 0;
- hur->hur_request.hr_flags = 0;
- memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
- sizeof(hur->hur_user_item[0].hui_fid));
- hur->hur_user_item[0].hui_extent.offset = offset;
- hur->hur_user_item[0].hui_extent.length = length;
- hur->hur_request.hr_itemcount = 1;
- rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,
- len, hur, NULL);
- kfree(hur);
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
deleted file mode 100644
index 3075358f3f08..000000000000
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ /dev/null
@@ -1,206 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * glimpse code shared between vvp and liblustre (and other Lustre clients in
- * the future).
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Oleg Drokin <oleg.drokin@sun.com>
- */
-
-#include <linux/libcfs/libcfs.h>
-#include <obd_class.h>
-#include <obd_support.h>
-#include <obd.h>
-
-#include <lustre_dlm.h>
-#include <lustre_mdc.h>
-#include <linux/pagemap.h>
-#include <linux/file.h>
-
-#include <cl_object.h>
-#include "llite_internal.h"
-
-static const struct cl_lock_descr whole_file = {
- .cld_start = 0,
- .cld_end = CL_PAGE_EOF,
- .cld_mode = CLM_READ
-};
-
-/*
- * Check whether file has possible unwriten pages.
- *
- * \retval 1 file is mmap-ed or has dirty pages
- * 0 otherwise
- */
-blkcnt_t dirty_cnt(struct inode *inode)
-{
- blkcnt_t cnt = 0;
- struct vvp_object *vob = cl_inode2vvp(inode);
- void *results[1];
-
- if (inode->i_mapping)
- cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->i_pages,
- results, 0, 1,
- PAGECACHE_TAG_DIRTY);
- if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
- cnt = 1;
-
- return (cnt > 0) ? 1 : 0;
-}
-
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
- struct inode *inode, struct cl_object *clob, int agl)
-{
- const struct lu_fid *fid = lu_object_fid(&clob->co_lu);
- struct cl_lock *lock = vvp_env_lock(env);
- struct cl_lock_descr *descr = &lock->cll_descr;
- int result = 0;
-
- CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid));
-
- /* NOTE: this looks like DLM lock request, but it may
- * not be one. Due to CEF_ASYNC flag (translated
- * to LDLM_FL_HAS_INTENT by osc), this is
- * glimpse request, that won't revoke any
- * conflicting DLM locks held. Instead,
- * ll_glimpse_callback() will be called on each
- * client holding a DLM lock against this file,
- * and resulting size will be returned for each
- * stripe. DLM lock on [0, EOF] is acquired only
- * if there were no conflicting locks. If there
- * were conflicting locks, enqueuing or waiting
- * fails with -ENAVAIL, but valid inode
- * attributes are returned anyway.
- */
- *descr = whole_file;
- descr->cld_obj = clob;
- descr->cld_mode = CLM_READ;
- descr->cld_enq_flags = CEF_ASYNC | CEF_MUST;
- if (agl)
- descr->cld_enq_flags |= CEF_AGL;
- /*
- * CEF_ASYNC is used because glimpse sub-locks cannot
- * deadlock (because they never conflict with other
- * locks) and, hence, can be enqueued out-of-order.
- *
- * CEF_MUST protects glimpse lock from conversion into
- * a lockless mode.
- */
- result = cl_lock_request(env, io, lock);
- if (result < 0)
- return result;
-
- if (!agl) {
- ll_merge_attr(env, inode);
- if (i_size_read(inode) > 0 && !inode->i_blocks) {
- /*
- * LU-417: Add dirty pages block count
- * lest i_blocks reports 0, some "cp" or
- * "tar" may think it's a completely
- * sparse file and skip it.
- */
- inode->i_blocks = dirty_cnt(inode);
- }
- }
-
- cl_lock_release(env, lock);
-
- return result;
-}
-
-static int cl_io_get(struct inode *inode, struct lu_env **envout,
- struct cl_io **ioout, u16 *refcheck)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *clob = lli->lli_clob;
- int result;
-
- if (S_ISREG(inode->i_mode)) {
- env = cl_env_get(refcheck);
- if (!IS_ERR(env)) {
- io = vvp_env_thread_io(env);
- io->ci_obj = clob;
- *envout = env;
- *ioout = io;
- result = 1;
- } else {
- result = PTR_ERR(env);
- }
- } else {
- result = 0;
- }
- return result;
-}
-
-int cl_glimpse_size0(struct inode *inode, int agl)
-{
- /*
- * We don't need ast_flags argument to cl_glimpse_size(), because
- * osc_lock_enqueue() takes care of the possible deadlock that said
- * argument was introduced to avoid.
- */
- /*
- * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to
- * cl_glimpse_size(), which doesn't make sense: glimpse locks are not
- * blocking anyway.
- */
- struct lu_env *env = NULL;
- struct cl_io *io = NULL;
- int result;
- u16 refcheck;
-
- result = cl_io_get(inode, &env, &io, &refcheck);
- if (result > 0) {
-again:
- io->ci_verify_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (result > 0)
- /*
- * nothing to do for this io. This currently happens
- * when stripe sub-object's are not yet created.
- */
- result = io->ci_result;
- else if (result == 0)
- result = cl_glimpse_lock(env, io, inode, io->ci_obj,
- agl);
-
- OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2);
- cl_io_fini(env, io);
- if (unlikely(io->ci_need_restart))
- goto again;
- cl_env_put(env, &refcheck);
- }
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
deleted file mode 100644
index df5c0c0ae703..000000000000
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ /dev/null
@@ -1,293 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl code shared between vvp and liblustre (and other Lustre clients in the
- * future).
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/libcfs/libcfs.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/quotaops.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/rbtree.h>
-
-#include <obd.h>
-#include <obd_support.h>
-#include <lustre_fid.h>
-#include <lustre_dlm.h>
-#include <lustre_mdc.h>
-#include <cl_object.h>
-
-#include "llite_internal.h"
-
-/*
- * ccc_ prefix stands for "Common Client Code".
- */
-
-/*****************************************************************************
- *
- * Vvp device and device type functions.
- *
- */
-
-/**
- * An `emergency' environment used by cl_inode_fini() when cl_env_get()
- * fails. Access to this environment is serialized by cl_inode_fini_guard
- * mutex.
- */
-struct lu_env *cl_inode_fini_env;
-u16 cl_inode_fini_refcheck;
-
-/**
- * A mutex serializing calls to slp_inode_fini() under extreme memory
- * pressure, when environments cannot be allocated.
- */
-static DEFINE_MUTEX(cl_inode_fini_guard);
-
-int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr,
- unsigned int attr_flags)
-{
- struct lu_env *env;
- struct cl_io *io;
- int result;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = obj;
- io->ci_verify_layout = 1;
-
- io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
- io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
- io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
- io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
- io->u.ci_setattr.sa_attr_flags = attr_flags;
- io->u.ci_setattr.sa_valid = attr->ia_valid;
- io->u.ci_setattr.sa_parent_fid = lu_object_fid(&obj->co_lu);
-
-again:
- if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
- struct vvp_io *vio = vvp_env_io(env);
-
- if (attr->ia_valid & ATTR_FILE)
- /* populate the file descriptor for ftruncate to honor
- * group lock - see LU-787
- */
- vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file);
-
- result = cl_io_loop(env, io);
- } else {
- result = io->ci_result;
- }
- cl_io_fini(env, io);
- if (unlikely(io->ci_need_restart))
- goto again;
-
- cl_env_put(env, &refcheck);
- return result;
-}
-
-/**
- * Initialize or update CLIO structures for regular files when new
- * meta-data arrives from the server.
- *
- * \param inode regular file inode
- * \param md new file metadata from MDS
- * - allocates cl_object if necessary,
- * - updated layout, if object was already here.
- */
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
-{
- struct lu_env *env;
- struct ll_inode_info *lli;
- struct cl_object *clob;
- struct lu_site *site;
- struct lu_fid *fid;
- struct cl_object_conf conf = {
- .coc_inode = inode,
- .u = {
- .coc_layout = md->layout,
- }
- };
- int result = 0;
- u16 refcheck;
-
- LASSERT(md->body->mbo_valid & OBD_MD_FLID);
- LASSERT(S_ISREG(inode->i_mode));
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- site = ll_i2sbi(inode)->ll_site;
- lli = ll_i2info(inode);
- fid = &lli->lli_fid;
- LASSERT(fid_is_sane(fid));
-
- if (!lli->lli_clob) {
- /* clob is slave of inode, empty lli_clob means for new inode,
- * there is no clob in cache with the given fid, so it is
- * unnecessary to perform lookup-alloc-lookup-insert, just
- * alloc and insert directly.
- */
- LASSERT(inode->i_state & I_NEW);
- conf.coc_lu.loc_flags = LOC_F_NEW;
- clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
- fid, &conf);
- if (!IS_ERR(clob)) {
- /*
- * No locking is necessary, as new inode is
- * locked by I_NEW bit.
- */
- lli->lli_clob = clob;
- lu_object_ref_add(&clob->co_lu, "inode", inode);
- } else {
- result = PTR_ERR(clob);
- }
- } else {
- result = cl_conf_set(env, lli->lli_clob, &conf);
- }
-
- cl_env_put(env, &refcheck);
-
- if (result != 0)
- CERROR("Failure to initialize cl object " DFID ": %d\n",
- PFID(fid), result);
- return result;
-}
-
-/**
- * Wait for others drop their references of the object at first, then we drop
- * the last one, which will lead to the object be destroyed immediately.
- * Must be called after cl_object_kill() against this object.
- *
- * The reason we want to do this is: destroying top object will wait for sub
- * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
- * to initiate top object destroying which may deadlock. See bz22520.
- */
-static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
-{
- struct lu_object_header *header = obj->co_lu.lo_header;
- wait_queue_entry_t waiter;
-
- if (unlikely(atomic_read(&header->loh_ref) != 1)) {
- struct lu_site *site = obj->co_lu.lo_dev->ld_site;
- struct lu_site_bkt_data *bkt;
-
- bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
-
- init_waitqueue_entry(&waiter, current);
- add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
-
- while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (atomic_read(&header->loh_ref) == 1)
- break;
- schedule();
- }
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
- }
-
- cl_object_put(env, obj);
-}
-
-void cl_inode_fini(struct inode *inode)
-{
- struct lu_env *env;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *clob = lli->lli_clob;
- u16 refcheck;
- int emergency;
-
- if (clob) {
- env = cl_env_get(&refcheck);
- emergency = IS_ERR(env);
- if (emergency) {
- mutex_lock(&cl_inode_fini_guard);
- LASSERT(cl_inode_fini_env);
- env = cl_inode_fini_env;
- }
- /*
- * cl_object cache is a slave to inode cache (which, in turn
- * is a slave to dentry cache), don't keep cl_object in memory
- * when its master is evicted.
- */
- cl_object_kill(env, clob);
- lu_object_ref_del(&clob->co_lu, "inode", inode);
- cl_object_put_last(env, clob);
- lli->lli_clob = NULL;
- if (emergency)
- mutex_unlock(&cl_inode_fini_guard);
- else
- cl_env_put(env, &refcheck);
- }
-}
-
-/**
- * build inode number from passed @fid
- */
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
-{
- if (BITS_PER_LONG == 32 || api32)
- return fid_flatten32(fid);
- else
- return fid_flatten(fid);
-}
-
-/**
- * build inode generation from passed @fid. If our FID overflows the 32-bit
- * inode number then return a non-zero generation to distinguish them.
- */
-__u32 cl_fid_build_gen(const struct lu_fid *fid)
-{
- __u32 gen;
-
- if (fid_is_igif(fid)) {
- gen = lu_igif_gen(fid);
- return gen;
- }
-
- gen = fid_flatten(fid) >> 32;
- return gen;
-}
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
deleted file mode 100644
index a246b955306e..000000000000
--- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c
+++ /dev/null
@@ -1,186 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl code shared between vvp and liblustre (and other Lustre clients in the
- * future).
- *
- */
-#define DEBUG_SUBSYSTEM S_LLITE
-#include <obd_class.h>
-#include <obd_support.h>
-#include <obd.h>
-#include <cl_object.h>
-
-#include "llite_internal.h"
-
-/* Initialize the default and maximum LOV EA and cookie sizes. This allows
- * us to make MDS RPCs with large enough reply buffers to hold the
- * maximum-sized (= maximum striped) EA and cookie without having to
- * calculate this (via a call into the LOV + OSCs) each time we make an RPC.
- */
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
-{
- u32 val_size, max_easize, def_easize;
- int rc;
-
- val_size = sizeof(max_easize);
- rc = obd_get_info(NULL, dt_exp, sizeof(KEY_MAX_EASIZE), KEY_MAX_EASIZE,
- &val_size, &max_easize);
- if (rc)
- return rc;
-
- val_size = sizeof(def_easize);
- rc = obd_get_info(NULL, dt_exp, sizeof(KEY_DEFAULT_EASIZE),
- KEY_DEFAULT_EASIZE, &val_size, &def_easize);
- if (rc)
- return rc;
-
- /*
- * default cookiesize is 0 because from 2.4 server doesn't send
- * llog cookies to client.
- */
- CDEBUG(D_HA, "updating def/max_easize: %d/%d\n",
- def_easize, max_easize);
-
- rc = md_init_ea_size(md_exp, max_easize, def_easize);
- return rc;
-}
-
-/**
- * This function is used as an upcall-callback hooked by liblustre and llite
- * clients into obd_notify() listeners chain to handle notifications about
- * change of import connect_flags. See llu_fsswop_mount() and
- * lustre_common_fill_super().
- */
-int cl_ocd_update(struct obd_device *host,
- struct obd_device *watched,
- enum obd_notify_event ev, void *owner, void *data)
-{
- struct lustre_client_ocd *lco;
- struct client_obd *cli;
- __u64 flags;
- int result;
-
- if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) &&
- watched->obd_set_up && !watched->obd_stopping) {
- cli = &watched->u.cli;
- lco = owner;
- flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
- CDEBUG(D_SUPER, "Changing connect_flags: %#llx -> %#llx\n",
- lco->lco_flags, flags);
- mutex_lock(&lco->lco_lock);
- lco->lco_flags &= flags;
- /* for each osc event update ea size */
- if (lco->lco_dt_exp)
- cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp);
-
- mutex_unlock(&lco->lco_lock);
- result = 0;
- } else {
- CERROR("unexpected notification from %s %s (setup:%d,stopping:%d)!\n",
- watched->obd_type->typ_name,
- watched->obd_name, watched->obd_set_up,
- watched->obd_stopping);
- result = -EINVAL;
- }
- return result;
-}
-
-#define GROUPLOCK_SCOPE "grouplock"
-
-int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
- struct ll_grouplock *cg)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct cl_lock *lock;
- struct cl_lock_descr *descr;
- __u32 enqflags;
- u16 refcheck;
- int rc;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = obj;
-
- rc = cl_io_init(env, io, CIT_MISC, io->ci_obj);
- if (rc != 0) {
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
- /* Does not make sense to take GL for released layout */
- if (rc > 0)
- rc = -ENOTSUPP;
- return rc;
- }
-
- lock = vvp_env_lock(env);
- descr = &lock->cll_descr;
- descr->cld_obj = obj;
- descr->cld_start = 0;
- descr->cld_end = CL_PAGE_EOF;
- descr->cld_gid = gid;
- descr->cld_mode = CLM_GROUP;
-
- enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0);
- descr->cld_enq_flags = enqflags;
-
- rc = cl_lock_request(env, io, lock);
- if (rc < 0) {
- cl_io_fini(env, io);
- cl_env_put(env, &refcheck);
- return rc;
- }
-
- cg->lg_env = env;
- cg->lg_io = io;
- cg->lg_lock = lock;
- cg->lg_gid = gid;
-
- return 0;
-}
-
-void cl_put_grouplock(struct ll_grouplock *cg)
-{
- struct lu_env *env = cg->lg_env;
- struct cl_io *io = cg->lg_io;
- struct cl_lock *lock = cg->lg_lock;
-
- LASSERT(cg->lg_env);
- LASSERT(cg->lg_gid);
-
- cl_lock_release(env, lock);
- cl_io_fini(env, io);
- cl_env_put(env, NULL);
-}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
deleted file mode 100644
index d46bcf71b273..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ /dev/null
@@ -1,1337 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef LLITE_INTERNAL_H
-#define LLITE_INTERNAL_H
-#include <lustre_debug.h>
-#include <uapi/linux/lustre/lustre_ver.h>
-#include <lustre_disk.h> /* for s2sbi */
-#include <lustre_linkea.h>
-
-/* for struct cl_lock_descr and struct cl_io */
-#include <lustre_patchless_compat.h>
-#include <lustre_compat.h>
-#include <cl_object.h>
-#include <lustre_lmv.h>
-#include <lustre_mdc.h>
-#include <lustre_intent.h>
-#include <linux/compat.h>
-#include <linux/namei.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
-#include "vvp_internal.h"
-#include "range_lock.h"
-
-#ifndef FMODE_EXEC
-#define FMODE_EXEC 0
-#endif
-
-#ifndef VM_FAULT_RETRY
-#define VM_FAULT_RETRY 0
-#endif
-
-/** Only used on client-side for indicating the tail of dir hash/offset. */
-#define LL_DIR_END_OFF 0x7fffffffffffffffULL
-#define LL_DIR_END_OFF_32BIT 0x7fffffffUL
-
-/* 4UL * 1024 * 1024 */
-#define LL_MAX_BLKSIZE_BITS 22
-
-#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-
-struct ll_dentry_data {
- struct lookup_intent *lld_it;
- unsigned int lld_sa_generation;
- unsigned int lld_invalid:1;
- unsigned int lld_nfs_dentry:1;
- struct rcu_head lld_rcu_head;
-};
-
-#define ll_d2d(de) ((struct ll_dentry_data *)((de)->d_fsdata))
-
-#define LLI_INODE_MAGIC 0x111d0de5
-#define LLI_INODE_DEAD 0xdeadd00d
-
-struct ll_getname_data {
- struct dir_context ctx;
- char *lgd_name; /* points to buffer with NAME_MAX+1 size */
- struct lu_fid lgd_fid; /* target fid we are looking for */
- int lgd_found; /* inode matched? */
-};
-
-struct ll_grouplock {
- struct lu_env *lg_env;
- struct cl_io *lg_io;
- struct cl_lock *lg_lock;
- unsigned long lg_gid;
-};
-
-enum ll_file_flags {
- /* File data is modified. */
- LLIF_DATA_MODIFIED = 0,
- /* File is being restored */
- LLIF_FILE_RESTORING = 1,
- /* Xattr cache is attached to the file */
- LLIF_XATTR_CACHE = 2,
-};
-
-struct ll_inode_info {
- __u32 lli_inode_magic;
-
- spinlock_t lli_lock;
- unsigned long lli_flags;
- struct posix_acl *lli_posix_acl;
-
- /* identifying fields for both metadata and data stacks. */
- struct lu_fid lli_fid;
- /* master inode fid for stripe directory */
- struct lu_fid lli_pfid;
-
- /* We need all three because every inode may be opened in different
- * modes
- */
- struct obd_client_handle *lli_mds_read_och;
- struct obd_client_handle *lli_mds_write_och;
- struct obd_client_handle *lli_mds_exec_och;
- __u64 lli_open_fd_read_count;
- __u64 lli_open_fd_write_count;
- __u64 lli_open_fd_exec_count;
- /* Protects access to och pointers and their usage counters */
- struct mutex lli_och_mutex;
-
- struct inode lli_vfs_inode;
-
- /* the most recent timestamps obtained from mds */
- s64 lli_atime;
- s64 lli_mtime;
- s64 lli_ctime;
- spinlock_t lli_agl_lock;
-
- /* Try to make the d::member and f::member are aligned. Before using
- * these members, make clear whether it is directory or not.
- */
- union {
- /* for directory */
- struct {
- /* serialize normal readdir and statahead-readdir. */
- struct mutex lli_readdir_mutex;
-
- /* metadata statahead */
- /* since parent-child threads can share the same @file
- * struct, "opendir_key" is the token when dir close for
- * case of parent exit before child -- it is me should
- * cleanup the dir readahead.
- */
- void *lli_opendir_key;
- struct ll_statahead_info *lli_sai;
- /* protect statahead stuff. */
- spinlock_t lli_sa_lock;
- /* "opendir_pid" is the token when lookup/revalidate
- * -- I am the owner of dir statahead.
- */
- pid_t lli_opendir_pid;
- /* stat will try to access statahead entries or start
- * statahead if this flag is set, and this flag will be
- * set upon dir open, and cleared when dir is closed,
- * statahead hit ratio is too low, or start statahead
- * thread failed.
- */
- unsigned int lli_sa_enabled:1;
- /* generation for statahead */
- unsigned int lli_sa_generation;
- /* directory stripe information */
- struct lmv_stripe_md *lli_lsm_md;
- /* default directory stripe offset. This is extracted
- * from the "dmv" xattr in order to decide which MDT to
- * create a subdirectory on. The MDS itself fetches
- * "dmv" and gets the rest of the default layout itself
- * (count, hash, etc).
- */
- __u32 lli_def_stripe_offset;
- };
-
- /* for non-directory */
- struct {
- struct mutex lli_size_mutex;
- char *lli_symlink_name;
- /*
- * struct rw_semaphore {
- * signed long count; // align d.d_def_acl
- * spinlock_t wait_lock; // align d.d_sa_lock
- * struct list_head wait_list;
- * }
- */
- struct rw_semaphore lli_trunc_sem;
- struct range_lock_tree lli_write_tree;
-
- struct rw_semaphore lli_glimpse_sem;
- unsigned long lli_glimpse_time;
- struct list_head lli_agl_list;
- __u64 lli_agl_index;
-
- /* for writepage() only to communicate to fsync */
- int lli_async_rc;
-
- /*
- * whenever a process try to read/write the file, the
- * jobid of the process will be saved here, and it'll
- * be packed into the write PRC when flush later.
- *
- * so the read/write statistics for jobid will not be
- * accurate if the file is shared by different jobs.
- */
- char lli_jobid[LUSTRE_JOBID_SIZE];
- };
- };
-
- /* XXX: For following frequent used members, although they maybe special
- * used for non-directory object, it is some time-wasting to check
- * whether the object is directory or not before using them. On the
- * other hand, currently, sizeof(f) > sizeof(d), it cannot reduce
- * the "ll_inode_info" size even if moving those members into u.f.
- * So keep them out side.
- *
- * In the future, if more members are added only for directory,
- * some of the following members can be moved into u.f.
- */
- struct cl_object *lli_clob;
-
- /* mutex to request for layout lock exclusively. */
- struct mutex lli_layout_mutex;
- /* Layout version, protected by lli_layout_lock */
- __u32 lli_layout_gen;
- spinlock_t lli_layout_lock;
-
- struct rw_semaphore lli_xattrs_list_rwsem;
- struct mutex lli_xattrs_enq_lock;
- struct list_head lli_xattrs;/* ll_xattr_entry->xe_list */
-};
-
-static inline __u32 ll_layout_version_get(struct ll_inode_info *lli)
-{
- __u32 gen;
-
- spin_lock(&lli->lli_layout_lock);
- gen = lli->lli_layout_gen;
- spin_unlock(&lli->lli_layout_lock);
-
- return gen;
-}
-
-static inline void ll_layout_version_set(struct ll_inode_info *lli, __u32 gen)
-{
- spin_lock(&lli->lli_layout_lock);
- lli->lli_layout_gen = gen;
- spin_unlock(&lli->lli_layout_lock);
-}
-
-int ll_xattr_cache_destroy(struct inode *inode);
-
-int ll_xattr_cache_get(struct inode *inode, const char *name,
- char *buffer, size_t size, __u64 valid);
-
-int ll_init_security(struct dentry *dentry, struct inode *inode,
- struct inode *dir);
-
-/*
- * Locking to guarantee consistency of non-atomic updates to long long i_size,
- * consistency between file size and KMS.
- *
- * Implemented by ->lli_size_mutex and ->lsm_lock, nested in that order.
- */
-
-void ll_inode_size_lock(struct inode *inode);
-void ll_inode_size_unlock(struct inode *inode);
-
-/* FIXME: replace the name of this with LL_I to conform to kernel stuff */
-/* static inline struct ll_inode_info *LL_I(struct inode *inode) */
-static inline struct ll_inode_info *ll_i2info(struct inode *inode)
-{
- return container_of(inode, struct ll_inode_info, lli_vfs_inode);
-}
-
-/* default to about 64M of readahead on a given system. */
-#define SBI_DEFAULT_READAHEAD_MAX (64UL << (20 - PAGE_SHIFT))
-
-/* default to read-ahead full files smaller than 2MB on the second read */
-#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT))
-
-enum ra_stat {
- RA_STAT_HIT = 0,
- RA_STAT_MISS,
- RA_STAT_DISTANT_READPAGE,
- RA_STAT_MISS_IN_WINDOW,
- RA_STAT_FAILED_GRAB_PAGE,
- RA_STAT_FAILED_MATCH,
- RA_STAT_DISCARDED,
- RA_STAT_ZERO_LEN,
- RA_STAT_ZERO_WINDOW,
- RA_STAT_EOF,
- RA_STAT_MAX_IN_FLIGHT,
- RA_STAT_WRONG_GRAB_PAGE,
- RA_STAT_FAILED_REACH_END,
- _NR_RA_STAT,
-};
-
-struct ll_ra_info {
- atomic_t ra_cur_pages;
- unsigned long ra_max_pages;
- unsigned long ra_max_pages_per_file;
- unsigned long ra_max_read_ahead_whole_pages;
-};
-
-/* ra_io_arg will be filled in the beginning of ll_readahead with
- * ras_lock, then the following ll_read_ahead_pages will read RA
- * pages according to this arg, all the items in this structure are
- * counted by page index.
- */
-struct ra_io_arg {
- unsigned long ria_start; /* start offset of read-ahead*/
- unsigned long ria_end; /* end offset of read-ahead*/
- unsigned long ria_reserved; /* reserved pages for read-ahead */
- unsigned long ria_end_min; /* minimum end to cover current read */
- bool ria_eof; /* reach end of file */
- /* If stride read pattern is detected, ria_stoff means where
- * stride read is started. Note: for normal read-ahead, the
- * value here is meaningless, and also it will not be accessed
- */
- pgoff_t ria_stoff;
- /* ria_length and ria_pages are the length and pages length in the
- * stride I/O mode. And they will also be used to check whether
- * it is stride I/O read-ahead in the read-ahead pages
- */
- unsigned long ria_length;
- unsigned long ria_pages;
-};
-
-/* LL_HIST_MAX=32 causes an overflow */
-#define LL_HIST_MAX 28
-#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
-#define LL_PROCESS_HIST_MAX 10
-struct per_process_info {
- pid_t pid;
- struct obd_histogram pp_r_hist;
- struct obd_histogram pp_w_hist;
-};
-
-/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
-struct ll_rw_extents_info {
- struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
-};
-
-#define LL_OFFSET_HIST_MAX 100
-struct ll_rw_process_info {
- pid_t rw_pid;
- int rw_op;
- loff_t rw_range_start;
- loff_t rw_range_end;
- loff_t rw_last_file_pos;
- loff_t rw_offset;
- size_t rw_smallest_extent;
- size_t rw_largest_extent;
- struct ll_file_data *rw_last_file;
-};
-
-enum stats_track_type {
- STATS_TRACK_ALL = 0, /* track all processes */
- STATS_TRACK_PID, /* track process with this pid */
- STATS_TRACK_PPID, /* track processes with this ppid */
- STATS_TRACK_GID, /* track processes with this gid */
- STATS_TRACK_LAST,
-};
-
-/* flags for sbi->ll_flags */
-#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */
-#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */
-#define LL_SBI_FLOCK 0x04
-#define LL_SBI_USER_XATTR 0x08 /* support user xattr */
-#define LL_SBI_ACL 0x10 /* support ACL */
-/* LL_SBI_RMT_CLIENT 0x40 remote client */
-#define LL_SBI_MDS_CAPA 0x80 /* support mds capa, obsolete */
-#define LL_SBI_OSS_CAPA 0x100 /* support oss capa, obsolete */
-#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */
-#define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */
-#define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */
-/* LL_SBI_SOM_PREVIEW 0x1000 SOM preview mount option, obsolete */
-#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
-#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */
-#define LL_SBI_AGL_ENABLED 0x8000 /* enable agl */
-#define LL_SBI_VERBOSE 0x10000 /* verbose mount/umount */
-#define LL_SBI_LAYOUT_LOCK 0x20000 /* layout lock support */
-#define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */
-#define LL_SBI_XATTR_CACHE 0x80000 /* support for xattr cache */
-#define LL_SBI_NOROOTSQUASH 0x100000 /* do not apply root squash */
-#define LL_SBI_ALWAYS_PING 0x200000 /* always ping even if server
- * suppress_pings
- */
-
-#define LL_SBI_FLAGS { \
- "nolck", \
- "checksum", \
- "flock", \
- "user_xattr", \
- "acl", \
- "???", \
- "???", \
- "mds_capa", \
- "oss_capa", \
- "flock", \
- "lru_resize", \
- "lazy_statfs", \
- "som", \
- "32bit_api", \
- "64bit_hash", \
- "agl", \
- "verbose", \
- "layout", \
- "user_fid2path",\
- "xattr_cache", \
- "norootsquash", \
- "always_ping", \
-}
-
-/*
- * This is embedded into llite super-blocks to keep track of connect
- * flags (capabilities) supported by all imports given mount is
- * connected to.
- */
-struct lustre_client_ocd {
- /*
- * This is conjunction of connect_flags across all imports
- * (LOVs) this mount is connected to. This field is updated by
- * cl_ocd_update() under ->lco_lock.
- */
- __u64 lco_flags;
- struct mutex lco_lock;
- struct obd_export *lco_md_exp;
- struct obd_export *lco_dt_exp;
-};
-
-struct ll_sb_info {
- /* this protects pglist and ra_info. It isn't safe to
- * grab from interrupt contexts
- */
- spinlock_t ll_lock;
- spinlock_t ll_pp_extent_lock; /* pp_extent entry*/
- spinlock_t ll_process_lock; /* ll_rw_process_info */
- struct obd_uuid ll_sb_uuid;
- struct obd_export *ll_md_exp;
- struct obd_export *ll_dt_exp;
- struct dentry *ll_debugfs_entry;
- struct lu_fid ll_root_fid; /* root object fid */
-
- int ll_flags;
- unsigned int ll_umounting:1,
- ll_xattr_cache_enabled:1,
- ll_client_common_fill_super_succeeded:1;
-
- struct lustre_client_ocd ll_lco;
-
- struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
-
- /*
- * Used to track "unstable" pages on a client, and maintain a
- * LRU list of clean pages. An "unstable" page is defined as
- * any page which is sent to a server as part of a bulk request,
- * but is uncommitted to stable storage.
- */
- struct cl_client_cache *ll_cache;
-
- struct lprocfs_stats *ll_ra_stats;
-
- struct ll_ra_info ll_ra_info;
- unsigned int ll_namelen;
- const struct file_operations *ll_fop;
-
- unsigned int ll_md_brw_pages; /* readdir pages per RPC */
-
- struct lu_site *ll_site;
- struct cl_device *ll_cl;
- /* Statistics */
- struct ll_rw_extents_info ll_rw_extents_info;
- int ll_extent_process_count;
- struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
- unsigned int ll_offset_process_count;
- struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
- unsigned int ll_rw_offset_entry_count;
- int ll_stats_track_id;
- enum stats_track_type ll_stats_track_type;
- int ll_rw_stats_on;
-
- /* metadata stat-ahead */
- unsigned int ll_sa_max; /* max statahead RPCs */
- atomic_t ll_sa_total; /* statahead thread started
- * count
- */
- atomic_t ll_sa_wrong; /* statahead thread stopped for
- * low hit ratio
- */
- atomic_t ll_sa_running; /* running statahead thread
- * count
- */
- atomic_t ll_agl_total; /* AGL thread started count */
-
- dev_t ll_sdev_orig; /* save s_dev before assign for
- * clustered nfs
- */
- /* root squash */
- struct root_squash_info ll_squash;
- struct path ll_mnt;
-
- __kernel_fsid_t ll_fsid;
- struct kobject ll_kobj; /* sysfs object */
- struct super_block *ll_sb; /* struct super_block (for sysfs code)*/
- struct completion ll_kobj_unregister;
-};
-
-/*
- * per file-descriptor read-ahead data.
- */
-struct ll_readahead_state {
- spinlock_t ras_lock;
- /*
- * index of the last page that read(2) needed and that wasn't in the
- * cache. Used by ras_update() to detect seeks.
- *
- * XXX nikita: if access seeks into cached region, Lustre doesn't see
- * this.
- */
- unsigned long ras_last_readpage;
- /*
- * number of pages read after last read-ahead window reset. As window
- * is reset on each seek, this is effectively a number of consecutive
- * accesses. Maybe ->ras_accessed_in_window is better name.
- *
- * XXX nikita: window is also reset (by ras_update()) when Lustre
- * believes that memory pressure evicts read-ahead pages. In that
- * case, it probably doesn't make sense to expand window to
- * PTLRPC_MAX_BRW_PAGES on the third access.
- */
- unsigned long ras_consecutive_pages;
- /*
- * number of read requests after the last read-ahead window reset
- * As window is reset on each seek, this is effectively the number
- * on consecutive read request and is used to trigger read-ahead.
- */
- unsigned long ras_consecutive_requests;
- /*
- * Parameters of current read-ahead window. Handled by
- * ras_update(). On the initial access to the file or after a seek,
- * window is reset to 0. After 3 consecutive accesses, window is
- * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by
- * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
- */
- unsigned long ras_window_start, ras_window_len;
- /*
- * Optimal RPC size. It decides how many pages will be sent
- * for each read-ahead.
- */
- unsigned long ras_rpc_size;
- /*
- * Where next read-ahead should start at. This lies within read-ahead
- * window. Read-ahead window is read in pieces rather than at once
- * because: 1. lustre limits total number of pages under read-ahead by
- * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages
- * not covered by DLM lock.
- */
- unsigned long ras_next_readahead;
- /*
- * Total number of ll_file_read requests issued, reads originating
- * due to mmap are not counted in this total. This value is used to
- * trigger full file read-ahead after multiple reads to a small file.
- */
- unsigned long ras_requests;
- /*
- * Page index with respect to the current request, these value
- * will not be accurate when dealing with reads issued via mmap.
- */
- unsigned long ras_request_index;
- /*
- * The following 3 items are used for detecting the stride I/O
- * mode.
- * In stride I/O mode,
- * ...............|-----data-----|****gap*****|--------|******|....
- * offset |-stride_pages-|-stride_gap-|
- * ras_stride_offset = offset;
- * ras_stride_length = stride_pages + stride_gap;
- * ras_stride_pages = stride_pages;
- * Note: all these three items are counted by pages.
- */
- unsigned long ras_stride_length;
- unsigned long ras_stride_pages;
- pgoff_t ras_stride_offset;
- /*
- * number of consecutive stride request count, and it is similar as
- * ras_consecutive_requests, but used for stride I/O mode.
- * Note: only more than 2 consecutive stride request are detected,
- * stride read-ahead will be enable
- */
- unsigned long ras_consecutive_stride_requests;
-};
-
-extern struct kmem_cache *ll_file_data_slab;
-struct lustre_handle;
-struct ll_file_data {
- struct ll_readahead_state fd_ras;
- struct ll_grouplock fd_grouplock;
- __u64 lfd_pos;
- __u32 fd_flags;
- fmode_t fd_omode;
- /* openhandle if lease exists for this file.
- * Borrow lli->lli_och_mutex to protect assignment
- */
- struct obd_client_handle *fd_lease_och;
- struct obd_client_handle *fd_och;
- struct file *fd_file;
- /* Indicate whether need to report failure when close.
- * true: failure is known, not report again.
- * false: unknown failure, should report.
- */
- bool fd_write_failed;
- rwlock_t fd_lock; /* protect lcc list */
- struct list_head fd_lccs; /* list of ll_cl_context */
-};
-
-extern struct dentry *llite_root;
-extern struct kset *llite_kset;
-
-static inline struct inode *ll_info2i(struct ll_inode_info *lli)
-{
- return &lli->lli_vfs_inode;
-}
-
-__u32 ll_i2suppgid(struct inode *i);
-void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2);
-
-static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
-{
-#if BITS_PER_LONG == 32
- return 1;
-#elif defined(CONFIG_COMPAT)
- return unlikely(in_compat_syscall() ||
- (sbi->ll_flags & LL_SBI_32BIT_API));
-#else
- return unlikely(sbi->ll_flags & LL_SBI_32BIT_API);
-#endif
-}
-
-void ll_ras_enter(struct file *f);
-
-/* llite/lcommon_misc.c */
-int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp);
-int cl_ocd_update(struct obd_device *host,
- struct obd_device *watched,
- enum obd_notify_event ev, void *owner, void *data);
-int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock,
- struct ll_grouplock *cg);
-void cl_put_grouplock(struct ll_grouplock *cg);
-
-/* llite/lproc_llite.c */
-int ldebugfs_register_mountpoint(struct dentry *parent,
- struct super_block *sb, char *osc, char *mdc);
-void ldebugfs_unregister_mountpoint(struct ll_sb_info *sbi);
-void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count);
-void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars);
-void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
- struct ll_file_data *file, loff_t pos,
- size_t count, int rw);
-
-enum {
- LPROC_LL_DIRTY_HITS,
- LPROC_LL_DIRTY_MISSES,
- LPROC_LL_READ_BYTES,
- LPROC_LL_WRITE_BYTES,
- LPROC_LL_BRW_READ,
- LPROC_LL_BRW_WRITE,
- LPROC_LL_IOCTL,
- LPROC_LL_OPEN,
- LPROC_LL_RELEASE,
- LPROC_LL_MAP,
- LPROC_LL_LLSEEK,
- LPROC_LL_FSYNC,
- LPROC_LL_READDIR,
- LPROC_LL_SETATTR,
- LPROC_LL_TRUNC,
- LPROC_LL_FLOCK,
- LPROC_LL_GETATTR,
- LPROC_LL_CREATE,
- LPROC_LL_LINK,
- LPROC_LL_UNLINK,
- LPROC_LL_SYMLINK,
- LPROC_LL_MKDIR,
- LPROC_LL_RMDIR,
- LPROC_LL_MKNOD,
- LPROC_LL_RENAME,
- LPROC_LL_STAFS,
- LPROC_LL_ALLOC_INODE,
- LPROC_LL_SETXATTR,
- LPROC_LL_GETXATTR,
- LPROC_LL_GETXATTR_HITS,
- LPROC_LL_LISTXATTR,
- LPROC_LL_REMOVEXATTR,
- LPROC_LL_INODE_PERM,
- LPROC_LL_FILE_OPCODES
-};
-
-/* llite/dir.c */
-extern const struct file_operations ll_dir_operations;
-extern const struct inode_operations ll_dir_inode_operations;
-int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
- struct dir_context *ctx);
-int ll_get_mdt_idx(struct inode *inode);
-int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid);
-struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
- __u64 offset);
-void ll_release_page(struct inode *inode, struct page *page, bool remove);
-
-/* llite/namei.c */
-extern const struct inode_operations ll_special_inode_operations;
-
-struct inode *ll_iget(struct super_block *sb, ino_t hash,
- struct lustre_md *lic);
-int ll_test_inode_by_fid(struct inode *inode, void *opaque);
-int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, int flag);
-struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
-void ll_update_times(struct ptlrpc_request *request, struct inode *inode);
-
-/* llite/rw.c */
-int ll_writepage(struct page *page, struct writeback_control *wbc);
-int ll_writepages(struct address_space *mapping, struct writeback_control *wbc);
-int ll_readpage(struct file *file, struct page *page);
-void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
-struct ll_cl_context *ll_cl_find(struct file *file);
-void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io);
-void ll_cl_remove(struct file *file, const struct lu_env *env);
-
-extern const struct address_space_operations ll_aops;
-
-/* llite/file.c */
-extern const struct file_operations ll_file_operations;
-extern const struct file_operations ll_file_operations_flock;
-extern const struct file_operations ll_file_operations_noflock;
-extern const struct inode_operations ll_file_inode_operations;
-int ll_have_md_lock(struct inode *inode, __u64 *bits,
- enum ldlm_mode l_req_mode);
-enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
- struct lustre_handle *lockh, __u64 flags,
- enum ldlm_mode mode);
-int ll_file_open(struct inode *inode, struct file *file);
-int ll_file_release(struct inode *inode, struct file *file);
-int ll_release_openhandle(struct inode *inode, struct lookup_intent *it);
-int ll_md_real_close(struct inode *inode, fmode_t fmode);
-int ll_getattr(const struct path *path, struct kstat *stat,
- u32 request_mask, unsigned int flags);
-struct posix_acl *ll_get_acl(struct inode *inode, int type);
-int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
- const char *name, int namelen);
-int ll_get_fid_by_name(struct inode *parent, const char *name,
- int namelen, struct lu_fid *fid, struct inode **inode);
-int ll_inode_permission(struct inode *inode, int mask);
-
-int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
- __u64 flags, struct lov_user_md *lum,
- int lum_size);
-int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
- struct lov_mds_md **lmm, int *lmm_size,
- struct ptlrpc_request **request);
-int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
- int set_default);
-int ll_dir_getstripe(struct inode *inode, void **lmmp, int *lmm_size,
- struct ptlrpc_request **request, u64 valid);
-int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
-int ll_merge_attr(const struct lu_env *env, struct inode *inode);
-int ll_fid2path(struct inode *inode, void __user *arg);
-int ll_data_version(struct inode *inode, __u64 *data_version, int flags);
-int ll_hsm_release(struct inode *inode);
-int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss);
-
-/* llite/dcache.c */
-
-extern const struct dentry_operations ll_d_ops;
-void ll_intent_drop_lock(struct lookup_intent *it);
-void ll_intent_release(struct lookup_intent *it);
-void ll_invalidate_aliases(struct inode *inode);
-void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode);
-int ll_revalidate_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it, struct inode *inode);
-
-/* llite/llite_lib.c */
-extern struct super_operations lustre_super_operations;
-
-void ll_lli_init(struct ll_inode_info *lli);
-int ll_fill_super(struct super_block *sb);
-void ll_put_super(struct super_block *sb);
-void ll_kill_super(struct super_block *sb);
-struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock);
-void ll_dir_clear_lsm_md(struct inode *inode);
-void ll_clear_inode(struct inode *inode);
-int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import);
-int ll_setattr(struct dentry *de, struct iattr *attr);
-int ll_statfs(struct dentry *de, struct kstatfs *sfs);
-int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
- __u64 max_age, __u32 flags);
-int ll_update_inode(struct inode *inode, struct lustre_md *md);
-int ll_read_inode2(struct inode *inode, void *opaque);
-void ll_delete_inode(struct inode *inode);
-int ll_iocontrol(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg);
-int ll_flush_ctx(struct inode *inode);
-void ll_umount_begin(struct super_block *sb);
-int ll_remount_fs(struct super_block *sb, int *flags, char *data);
-int ll_show_options(struct seq_file *seq, struct dentry *dentry);
-void ll_dirty_page_discard_warn(struct page *page, int ioret);
-int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
- struct super_block *sb, struct lookup_intent *it);
-int ll_obd_statfs(struct inode *inode, void __user *arg);
-int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
-int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize);
-int ll_set_default_mdsize(struct ll_sb_info *sbi, int default_mdsize);
-int ll_process_config(struct lustre_cfg *lcfg);
-
-enum {
- LUSTRE_OPC_MKDIR = 0,
- LUSTRE_OPC_SYMLINK = 1,
- LUSTRE_OPC_MKNOD = 2,
- LUSTRE_OPC_CREATE = 3,
- LUSTRE_OPC_ANY = 5,
-};
-
-struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
- struct inode *i1, struct inode *i2,
- const char *name, size_t namelen,
- u32 mode, __u32 opc, void *data);
-void ll_finish_md_op_data(struct md_op_data *op_data);
-int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
-char *ll_get_fsname(struct super_block *sb, char *buf, int buflen);
-void ll_compute_rootsquash_state(struct ll_sb_info *sbi);
-void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req);
-ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
- struct lov_user_md **kbuf);
-
-/* Compute expected user md size when passing in a md from user space */
-static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
-{
- switch (lum->lmm_magic) {
- case LOV_USER_MAGIC_V1:
- return sizeof(struct lov_user_md_v1);
- case LOV_USER_MAGIC_V3:
- return sizeof(struct lov_user_md_v3);
- case LOV_USER_MAGIC_SPECIFIC:
- if (lum->lmm_stripe_count > LOV_MAX_STRIPE_COUNT)
- return -EINVAL;
-
- return lov_user_md_size(lum->lmm_stripe_count,
- LOV_USER_MAGIC_SPECIFIC);
- }
- return -EINVAL;
-}
-
-/* llite/llite_nfs.c */
-extern const struct export_operations lustre_export_operations;
-__u32 get_uuid2int(const char *name, int len);
-void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid);
-struct inode *search_inode_for_lustre(struct super_block *sb,
- const struct lu_fid *fid);
-int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid);
-
-/* llite/symlink.c */
-extern const struct inode_operations ll_fast_symlink_inode_operations;
-
-/**
- * IO arguments for various VFS I/O interfaces.
- */
-struct vvp_io_args {
- /** normal/splice */
- union {
- struct {
- struct kiocb *via_iocb;
- struct iov_iter *via_iter;
- } normal;
- } u;
-};
-
-struct ll_cl_context {
- struct list_head lcc_list;
- void *lcc_cookie;
- const struct lu_env *lcc_env;
- struct cl_io *lcc_io;
- struct cl_page *lcc_page;
-};
-
-struct ll_thread_info {
- struct vvp_io_args lti_args;
- struct ra_io_arg lti_ria;
- struct ll_cl_context lti_io_ctx;
-};
-
-extern struct lu_context_key ll_thread_key;
-static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
-{
- struct ll_thread_info *lti;
-
- lti = lu_context_key_get(&env->le_ctx, &ll_thread_key);
- LASSERT(lti);
- return lti;
-}
-
-static inline struct vvp_io_args *ll_env_args(const struct lu_env *env)
-{
- return &ll_env_info(env)->lti_args;
-}
-
-/* llite/llite_mmap.c */
-
-int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last);
-int ll_file_mmap(struct file *file, struct vm_area_struct *vma);
-void policy_from_vma(union ldlm_policy_data *policy, struct vm_area_struct *vma,
- unsigned long addr, size_t count);
-struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
- size_t count);
-
-static inline void ll_invalidate_page(struct page *vmpage)
-{
- struct address_space *mapping = vmpage->mapping;
- loff_t offset = vmpage->index << PAGE_SHIFT;
-
- LASSERT(PageLocked(vmpage));
- if (!mapping)
- return;
-
- /*
- * truncate_complete_page() calls
- * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete().
- */
- ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE);
- truncate_complete_page(mapping, vmpage);
-}
-
-#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
-
-/* don't need an addref as the sb_info should be holding one */
-static inline struct obd_export *ll_s2dtexp(struct super_block *sb)
-{
- return ll_s2sbi(sb)->ll_dt_exp;
-}
-
-/* don't need an addref as the sb_info should be holding one */
-static inline struct obd_export *ll_s2mdexp(struct super_block *sb)
-{
- return ll_s2sbi(sb)->ll_md_exp;
-}
-
-static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi)
-{
- struct obd_device *obd = sbi->ll_md_exp->exp_obd;
-
- if (!obd)
- LBUG();
- return &obd->u.cli;
-}
-
-/* FIXME: replace the name of this with LL_SB to conform to kernel stuff */
-static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
-{
- return ll_s2sbi(inode->i_sb);
-}
-
-static inline struct obd_export *ll_i2dtexp(struct inode *inode)
-{
- return ll_s2dtexp(inode->i_sb);
-}
-
-static inline struct obd_export *ll_i2mdexp(struct inode *inode)
-{
- return ll_s2mdexp(inode->i_sb);
-}
-
-static inline struct lu_fid *ll_inode2fid(struct inode *inode)
-{
- struct lu_fid *fid;
-
- LASSERT(inode);
- fid = &ll_i2info(inode)->lli_fid;
-
- return fid;
-}
-
-static inline loff_t ll_file_maxbytes(struct inode *inode)
-{
- struct cl_object *obj = ll_i2info(inode)->lli_clob;
-
- if (!obj)
- return MAX_LFS_FILESIZE;
-
- return min_t(loff_t, cl_object_maxbytes(obj), MAX_LFS_FILESIZE);
-}
-
-/* llite/xattr.c */
-extern const struct xattr_handler *ll_xattr_handlers[];
-
-#define XATTR_USER_T 1
-#define XATTR_TRUSTED_T 2
-#define XATTR_SECURITY_T 3
-#define XATTR_ACL_ACCESS_T 4
-#define XATTR_ACL_DEFAULT_T 5
-#define XATTR_LUSTRE_T 6
-#define XATTR_OTHER_T 7
-
-ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
-int ll_xattr_list(struct inode *inode, const char *name, int type,
- void *buffer, size_t size, __u64 valid);
-const struct xattr_handler *get_xattr_type(const char *name);
-
-/**
- * Common IO arguments for various VFS I/O interfaces.
- */
-int cl_sb_init(struct super_block *sb);
-int cl_sb_fini(struct super_block *sb);
-
-enum ras_update_flags {
- LL_RAS_HIT = 0x1,
- LL_RAS_MMAP = 0x2
-};
-void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
-void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
-
-/* statahead.c */
-#define LL_SA_RPC_MIN 2
-#define LL_SA_RPC_DEF 32
-#define LL_SA_RPC_MAX 8192
-
-#define LL_SA_CACHE_BIT 5
-#define LL_SA_CACHE_SIZE (1 << LL_SA_CACHE_BIT)
-#define LL_SA_CACHE_MASK (LL_SA_CACHE_SIZE - 1)
-
-/* per inode struct, for dir only */
-struct ll_statahead_info {
- struct dentry *sai_dentry;
- atomic_t sai_refcount; /* when access this struct, hold
- * refcount
- */
- unsigned int sai_max; /* max ahead of lookup */
- __u64 sai_sent; /* stat requests sent count */
- __u64 sai_replied; /* stat requests which received
- * reply
- */
- __u64 sai_index; /* index of statahead entry */
- __u64 sai_index_wait; /* index of entry which is the
- * caller is waiting for
- */
- __u64 sai_hit; /* hit count */
- __u64 sai_miss; /* miss count:
- * for "ls -al" case, it includes
- * hidden dentry miss;
- * for "ls -l" case, it does not
- * include hidden dentry miss.
- * "sai_miss_hidden" is used for
- * the later case.
- */
- unsigned int sai_consecutive_miss; /* consecutive miss */
- unsigned int sai_miss_hidden;/* "ls -al", but first dentry
- * is not a hidden one
- */
- unsigned int sai_skip_hidden;/* skipped hidden dentry count */
- unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for
- * hidden entries
- */
- sai_agl_valid:1,/* AGL is valid for the dir */
- sai_in_readpage:1;/* statahead in readdir() */
- wait_queue_head_t sai_waitq; /* stat-ahead wait queue */
- struct task_struct *sai_task; /* stat-ahead thread */
- struct task_struct *sai_agl_task; /* AGL thread */
- struct list_head sai_interim_entries; /* entries which got async
- * stat reply, but not
- * instantiated
- */
- struct list_head sai_entries; /* completed entries */
- struct list_head sai_agls; /* AGLs to be sent */
- struct list_head sai_cache[LL_SA_CACHE_SIZE];
- spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE];
- atomic_t sai_cache_count; /* entry count in cache */
-};
-
-int ll_statahead(struct inode *dir, struct dentry **dentry, bool unplug);
-void ll_authorize_statahead(struct inode *dir, void *key);
-void ll_deauthorize_statahead(struct inode *dir, void *key);
-
-blkcnt_t dirty_cnt(struct inode *inode);
-
-int cl_glimpse_size0(struct inode *inode, int agl);
-int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io,
- struct inode *inode, struct cl_object *clob, int agl);
-
-static inline int cl_glimpse_size(struct inode *inode)
-{
- return cl_glimpse_size0(inode, 0);
-}
-
-static inline int cl_agl(struct inode *inode)
-{
- return cl_glimpse_size0(inode, 1);
-}
-
-static inline int ll_glimpse_size(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- down_read(&lli->lli_glimpse_sem);
- rc = cl_glimpse_size(inode);
- lli->lli_glimpse_time = cfs_time_current();
- up_read(&lli->lli_glimpse_sem);
- return rc;
-}
-
-/*
- * dentry may statahead when statahead is enabled and current process has opened
- * parent directory, and this dentry hasn't accessed statahead cache before
- */
-static inline bool
-dentry_may_statahead(struct inode *dir, struct dentry *dentry)
-{
- struct ll_inode_info *lli;
- struct ll_dentry_data *ldd;
-
- if (ll_i2sbi(dir)->ll_sa_max == 0)
- return false;
-
- lli = ll_i2info(dir);
-
- /*
- * statahead is not allowed for this dir, there may be three causes:
- * 1. dir is not opened.
- * 2. statahead hit ratio is too low.
- * 3. previous stat started statahead thread failed.
- */
- if (!lli->lli_sa_enabled)
- return false;
-
- /* not the same process, don't statahead */
- if (lli->lli_opendir_pid != current_pid())
- return false;
-
- /*
- * When stating a dentry, kernel may trigger 'revalidate' or 'lookup'
- * multiple times, eg. for 'getattr', 'getxattr' and etc.
- * For patchless client, lookup intent is not accurate, which may
- * misguide statahead. For example:
- * The 'revalidate' call for 'getattr' and 'getxattr' of a dentry will
- * have the same intent -- IT_GETATTR, while one dentry should access
- * statahead cache once, otherwise statahead windows is messed up.
- * The solution is as following:
- * Assign 'lld_sa_generation' with 'lli_sa_generation' when a dentry
- * IT_GETATTR for the first time, and subsequent IT_GETATTR will
- * bypass interacting with statahead cache by checking
- * 'lld_sa_generation == lli->lli_sa_generation'.
- */
- ldd = ll_d2d(dentry);
- if (ldd->lld_sa_generation == lli->lli_sa_generation)
- return false;
-
- return true;
-}
-
-/* llite ioctl register support routine */
-enum llioc_iter {
- LLIOC_CONT = 0,
- LLIOC_STOP
-};
-
-#define LLIOC_MAX_CMD 256
-
-/*
- * Rules to write a callback function:
- *
- * Parameters:
- * @magic: Dynamic ioctl call routine will feed this value with the pointer
- * returned to ll_iocontrol_register. Callback functions should use this
- * data to check the potential collasion of ioctl cmd. If collasion is
- * found, callback function should return LLIOC_CONT.
- * @rcp: The result of ioctl command.
- *
- * Return values:
- * If @magic matches the pointer returned by ll_iocontrol_data, the
- * callback should return LLIOC_STOP; return LLIOC_STOP otherwise.
- */
-typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
- struct file *file, unsigned int cmd, unsigned long arg,
- void *magic, int *rcp);
-
-/* export functions */
-/* Register ioctl block dynamatically for a regular file.
- *
- * @cmd: the array of ioctl command set
- * @count: number of commands in the @cmd
- * @cb: callback function, it will be called if an ioctl command is found to
- * belong to the command list @cmd.
- *
- * Return value:
- * A magic pointer will be returned if success;
- * otherwise, NULL will be returned.
- */
-void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
-void ll_iocontrol_unregister(void *magic);
-
-int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
- enum cl_fsync_mode mode, int ignore_layout);
-
-/** direct write pages */
-struct ll_dio_pages {
- /** page array to be written. we don't support
- * partial pages except the last one.
- */
- struct page **ldp_pages;
- /* offset of each page */
- loff_t *ldp_offsets;
- /** if ldp_offsets is NULL, it means a sequential
- * pages to be written, then this is the file offset
- * of the first page.
- */
- loff_t ldp_start_offset;
- /** how many bytes are to be written. */
- size_t ldp_size;
- /** # of pages in the array. */
- int ldp_nr;
-};
-
-ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
- int rw, struct inode *inode,
- struct ll_dio_pages *pv);
-
-static inline int ll_file_nolock(const struct file *file)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct inode *inode = file_inode(file);
-
- return ((fd->fd_flags & LL_FILE_IGNORE_LOCK) ||
- (ll_i2sbi(inode)->ll_flags & LL_SBI_NOLCK));
-}
-
-static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
- struct lookup_intent *it, __u64 *bits)
-{
- if (!it->it_lock_set) {
- struct lustre_handle handle;
-
- /* If this inode is a remote object, it will get two
- * separate locks in different namespaces, Master MDT,
- * where the name entry is, will grant LOOKUP lock,
- * remote MDT, where the object is, will grant
- * UPDATE|PERM lock. The inode will be attached to both
- * LOOKUP and PERM locks, so revoking either locks will
- * case the dcache being cleared
- */
- if (it->it_remote_lock_mode) {
- handle.cookie = it->it_remote_lock_handle;
- CDEBUG(D_DLMTRACE, "setting l_data to inode " DFID "%p for remote lock %#llx\n",
- PFID(ll_inode2fid(inode)), inode,
- handle.cookie);
- md_set_lock_data(exp, &handle, inode, NULL);
- }
-
- handle.cookie = it->it_lock_handle;
-
- CDEBUG(D_DLMTRACE,
- "setting l_data to inode " DFID "%p for lock %#llx\n",
- PFID(ll_inode2fid(inode)), inode, handle.cookie);
-
- md_set_lock_data(exp, &handle, inode, &it->it_lock_bits);
- it->it_lock_set = 1;
- }
-
- if (bits)
- *bits = it->it_lock_bits;
-}
-
-static inline int d_lustre_invalid(const struct dentry *dentry)
-{
- return ll_d2d(dentry)->lld_invalid;
-}
-
-/*
- * Mark dentry INVALID, if dentry refcount is zero (this is normally case for
- * ll_md_blocking_ast), unhash this dentry, and let dcache to reclaim it later;
- * else dput() of the last refcount will unhash this dentry and kill it.
- */
-static inline void d_lustre_invalidate(struct dentry *dentry, int nested)
-{
- CDEBUG(D_DENTRY,
- "invalidate dentry %pd (%p) parent %p inode %p refc %d\n",
- dentry, dentry,
- dentry->d_parent, d_inode(dentry), d_count(dentry));
-
- spin_lock_nested(&dentry->d_lock,
- nested ? DENTRY_D_LOCK_NESTED : DENTRY_D_LOCK_NORMAL);
- ll_d2d(dentry)->lld_invalid = 1;
- if (d_count(dentry) == 0)
- __d_drop(dentry);
- spin_unlock(&dentry->d_lock);
-}
-
-static inline void d_lustre_revalidate(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- LASSERT(ll_d2d(dentry));
- ll_d2d(dentry)->lld_invalid = 0;
- spin_unlock(&dentry->d_lock);
-}
-
-int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
-int ll_layout_refresh(struct inode *inode, __u32 *gen);
-int ll_layout_restore(struct inode *inode, loff_t start, __u64 length);
-
-int ll_xattr_init(void);
-void ll_xattr_fini(void);
-
-int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, enum cl_req_type crt);
-
-int ll_getparent(struct file *file, struct getparent __user *arg);
-
-/* lcommon_cl.c */
-int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr,
- unsigned int attr_flags);
-
-extern struct lu_env *cl_inode_fini_env;
-extern u16 cl_inode_fini_refcheck;
-
-int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
-
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
-__u32 cl_fid_build_gen(const struct lu_fid *fid);
-
-#endif /* LLITE_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
deleted file mode 100644
index e7500c53fafc..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ /dev/null
@@ -1,2666 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/llite_lib.c
- *
- * Lustre Light Super operations
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/module.h>
-#include <linux/statfs.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-
-#include <uapi/linux/lustre/lustre_ioctl.h>
-#include <lustre_ha.h>
-#include <lustre_dlm.h>
-#include <lprocfs_status.h>
-#include <lustre_disk.h>
-#include <uapi/linux/lustre/lustre_param.h>
-#include <lustre_log.h>
-#include <cl_object.h>
-#include <obd_cksum.h>
-#include "llite_internal.h"
-
-struct kmem_cache *ll_file_data_slab;
-struct dentry *llite_root;
-struct kset *llite_kset;
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
-static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
-{
- struct ll_sb_info *sbi = NULL;
- unsigned long pages;
- unsigned long lru_page_max;
- struct sysinfo si;
- class_uuid_t uuid;
- int i;
-
- sbi = kzalloc(sizeof(*sbi), GFP_NOFS);
- if (!sbi)
- return NULL;
-
- spin_lock_init(&sbi->ll_lock);
- mutex_init(&sbi->ll_lco.lco_lock);
- spin_lock_init(&sbi->ll_pp_extent_lock);
- spin_lock_init(&sbi->ll_process_lock);
- sbi->ll_rw_stats_on = 0;
-
- si_meminfo(&si);
- pages = si.totalram - si.totalhigh;
- lru_page_max = pages / 2;
-
- sbi->ll_cache = cl_cache_init(lru_page_max);
- if (!sbi->ll_cache) {
- kfree(sbi);
- return NULL;
- }
-
- sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
- SBI_DEFAULT_READAHEAD_MAX);
- sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
- sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
- SBI_DEFAULT_READAHEAD_WHOLE_MAX;
-
- ll_generate_random_uuid(uuid);
- class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
- CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
-
- sbi->ll_flags |= LL_SBI_VERBOSE;
- sbi->ll_flags |= LL_SBI_CHECKSUM;
-
- sbi->ll_flags |= LL_SBI_LRU_RESIZE;
- sbi->ll_flags |= LL_SBI_LAZYSTATFS;
-
- for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
- spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
- pp_r_hist.oh_lock);
- spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
- pp_w_hist.oh_lock);
- }
-
- /* metadata statahead is enabled by default */
- sbi->ll_sa_max = LL_SA_RPC_DEF;
- atomic_set(&sbi->ll_sa_total, 0);
- atomic_set(&sbi->ll_sa_wrong, 0);
- atomic_set(&sbi->ll_sa_running, 0);
- atomic_set(&sbi->ll_agl_total, 0);
- sbi->ll_flags |= LL_SBI_AGL_ENABLED;
-
- /* root squash */
- sbi->ll_squash.rsi_uid = 0;
- sbi->ll_squash.rsi_gid = 0;
- INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids);
- init_rwsem(&sbi->ll_squash.rsi_sem);
-
- sbi->ll_sb = sb;
-
- return sbi;
-}
-
-static void ll_free_sbi(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-
- if (sbi->ll_cache) {
- if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids))
- cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids);
- cl_cache_decref(sbi->ll_cache);
- sbi->ll_cache = NULL;
- }
-
- kfree(sbi);
-}
-
-static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
-{
- struct inode *root = NULL;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct obd_statfs *osfs = NULL;
- struct ptlrpc_request *request = NULL;
- struct obd_connect_data *data = NULL;
- struct obd_uuid *uuid;
- struct md_op_data *op_data;
- struct lustre_md lmd;
- u64 valid;
- int size, err, checksum;
-
- obd = class_name2obd(md);
- if (!obd) {
- CERROR("MD %s: not setup or attached\n", md);
- return -EINVAL;
- }
-
- data = kzalloc(sizeof(*data), GFP_NOFS);
- if (!data)
- return -ENOMEM;
-
- osfs = kzalloc(sizeof(*osfs), GFP_NOFS);
- if (!osfs) {
- kfree(data);
- return -ENOMEM;
- }
-
- /* indicate the features supported by this client */
- data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH |
- OBD_CONNECT_ATTRFID |
- OBD_CONNECT_VERSION | OBD_CONNECT_BRW_SIZE |
- OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
- OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 |
- OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
- OBD_CONNECT_64BITHASH |
- OBD_CONNECT_EINPROGRESS |
- OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
- OBD_CONNECT_LAYOUTLOCK |
- OBD_CONNECT_PINGLESS |
- OBD_CONNECT_MAX_EASIZE |
- OBD_CONNECT_FLOCK_DEAD |
- OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
- OBD_CONNECT_OPEN_BY_FID |
- OBD_CONNECT_DIR_STRIPE |
- OBD_CONNECT_BULK_MBITS;
-
- if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
- data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-#ifdef CONFIG_FS_POSIX_ACL
- data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK;
-#endif
-
- if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT))
- /* flag mdc connection as lightweight, only used for test
- * purpose, use with care
- */
- data->ocd_connect_flags |= OBD_CONNECT_LIGHTWEIGHT;
-
- data->ocd_ibits_known = MDS_INODELOCK_FULL;
- data->ocd_version = LUSTRE_VERSION_CODE;
-
- if (sb_rdonly(sb))
- data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
- if (sbi->ll_flags & LL_SBI_USER_XATTR)
- data->ocd_connect_flags |= OBD_CONNECT_XATTR;
-
- if (sbi->ll_flags & LL_SBI_FLOCK)
- sbi->ll_fop = &ll_file_operations_flock;
- else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
- sbi->ll_fop = &ll_file_operations;
- else
- sbi->ll_fop = &ll_file_operations_noflock;
-
- /* always ping even if server suppress_pings */
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
- data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
-
- data->ocd_brw_size = MD_MAX_BRW_SIZE;
-
- err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid,
- data, NULL);
- if (err == -EBUSY) {
- LCONSOLE_ERROR_MSG(0x14f,
- "An MDT (md %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n",
- md);
- goto out;
- }
-
- if (err) {
- CERROR("cannot connect to %s: rc = %d\n", md, err);
- goto out;
- }
-
- sbi->ll_md_exp->exp_connect_data = *data;
-
- err = obd_fid_init(sbi->ll_md_exp->exp_obd, sbi->ll_md_exp,
- LUSTRE_SEQ_METADATA);
- if (err) {
- CERROR("%s: Can't init metadata layer FID infrastructure, rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name, err);
- goto out_md;
- }
-
- /* For mount, we only need fs info from MDT0, and also in DNE, it
- * can make sure the client can be mounted as long as MDT0 is
- * available
- */
- err = obd_statfs(NULL, sbi->ll_md_exp, osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_FOR_MDT0);
- if (err)
- goto out_md_fid;
-
- /* This needs to be after statfs to ensure connect has finished.
- * Note that "data" does NOT contain the valid connect reply.
- * If connecting to a 1.8 server there will be no LMV device, so
- * we can access the MDC export directly and exp_connect_flags will
- * be non-zero, but if accessing an upgraded 2.1 server it will
- * have the correct flags filled in.
- * XXX: fill in the LMV exp_connect_flags from MDC(s).
- */
- valid = exp_connect_flags(sbi->ll_md_exp) & CLIENT_CONNECT_MDT_REQD;
- if (exp_connect_flags(sbi->ll_md_exp) != 0 &&
- valid != CLIENT_CONNECT_MDT_REQD) {
- char *buf;
-
- buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!buf) {
- err = -ENOMEM;
- goto out_md_fid;
- }
- obd_connect_flags2str(buf, PAGE_SIZE,
- valid ^ CLIENT_CONNECT_MDT_REQD, ",");
- LCONSOLE_ERROR_MSG(0x170,
- "Server %s does not support feature(s) needed for correct operation of this client (%s). Please upgrade server or downgrade client.\n",
- sbi->ll_md_exp->exp_obd->obd_name, buf);
- kfree(buf);
- err = -EPROTO;
- goto out_md_fid;
- }
-
- size = sizeof(*data);
- err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA),
- KEY_CONN_DATA, &size, data);
- if (err) {
- CERROR("%s: Get connect data failed: rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name, err);
- goto out_md_fid;
- }
-
- LASSERT(osfs->os_bsize);
- sb->s_blocksize = osfs->os_bsize;
- sb->s_blocksize_bits = log2(osfs->os_bsize);
- sb->s_magic = LL_SUPER_MAGIC;
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- sbi->ll_namelen = osfs->os_namelen;
- sbi->ll_mnt.mnt = current->fs->root.mnt;
-
- if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
- !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
- LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
- }
-
- if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
- sb->s_flags |= SB_POSIXACL;
- sbi->ll_flags |= LL_SBI_ACL;
- } else {
- LCONSOLE_INFO("client wants to enable acl, but mdt not!\n");
- sb->s_flags &= ~SB_POSIXACL;
- sbi->ll_flags &= ~LL_SBI_ACL;
- }
-
- if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
- sbi->ll_flags |= LL_SBI_64BIT_HASH;
-
- if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
- sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_SHIFT;
- else
- sbi->ll_md_brw_pages = 1;
-
- if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
- sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
-
- if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
- if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
- LCONSOLE_INFO(
- "%s: disabling xattr cache due to unknown maximum xattr size.\n",
- dt);
- } else {
- sbi->ll_flags |= LL_SBI_XATTR_CACHE;
- sbi->ll_xattr_cache_enabled = 1;
- }
- }
-
- obd = class_name2obd(dt);
- if (!obd) {
- CERROR("DT %s: not setup or attached\n", dt);
- err = -ENODEV;
- goto out_md_fid;
- }
-
- data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION |
- OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
- OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
- OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK|
- OBD_CONNECT_AT | OBD_CONNECT_OSS_CAPA |
- OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
- OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES |
- OBD_CONNECT_EINPROGRESS |
- OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
- OBD_CONNECT_LAYOUTLOCK |
- OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK |
- OBD_CONNECT_BULK_MBITS;
-
- if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
- /* OBD_CONNECT_CKSUM should always be set, even if checksums are
- * disabled by default, because it can still be enabled on the
- * fly via /sys. As a consequence, we still need to come to an
- * agreement on the supported algorithms at connect time
- */
- data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
-
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
- data->ocd_cksum_types = OBD_CKSUM_ADLER;
- else
- data->ocd_cksum_types = cksum_types_supported_client();
- }
-
- data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-
- /* always ping even if server suppress_pings */
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
- data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
-
- CDEBUG(D_RPCTRACE,
- "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n",
- data->ocd_connect_flags,
- data->ocd_version, data->ocd_grant);
-
- obd->obd_upcall.onu_owner = &sbi->ll_lco;
- obd->obd_upcall.onu_upcall = cl_ocd_update;
-
- data->ocd_brw_size = DT_MAX_BRW_SIZE;
-
- err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, data,
- NULL);
- if (err == -EBUSY) {
- LCONSOLE_ERROR_MSG(0x150,
- "An OST (dt %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n",
- dt);
- goto out_md;
- } else if (err) {
- CERROR("%s: Cannot connect to %s: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, dt, err);
- goto out_md;
- }
-
- sbi->ll_dt_exp->exp_connect_data = *data;
-
- err = obd_fid_init(sbi->ll_dt_exp->exp_obd, sbi->ll_dt_exp,
- LUSTRE_SEQ_METADATA);
- if (err) {
- CERROR("%s: Can't init data layer FID infrastructure, rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, err);
- goto out_dt;
- }
-
- mutex_lock(&sbi->ll_lco.lco_lock);
- sbi->ll_lco.lco_flags = data->ocd_connect_flags;
- sbi->ll_lco.lco_md_exp = sbi->ll_md_exp;
- sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp;
- mutex_unlock(&sbi->ll_lco.lco_lock);
-
- fid_zero(&sbi->ll_root_fid);
- err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid);
- if (err) {
- CERROR("cannot mds_connect: rc = %d\n", err);
- goto out_lock_cn_cb;
- }
- if (!fid_is_sane(&sbi->ll_root_fid)) {
- CERROR("%s: Invalid root fid " DFID " during mount\n",
- sbi->ll_md_exp->exp_obd->obd_name,
- PFID(&sbi->ll_root_fid));
- err = -EINVAL;
- goto out_lock_cn_cb;
- }
- CDEBUG(D_SUPER, "rootfid " DFID "\n", PFID(&sbi->ll_root_fid));
-
- sb->s_op = &lustre_super_operations;
- sb->s_xattr = ll_xattr_handlers;
-#if THREAD_SIZE >= 8192 /*b=17630*/
- sb->s_export_op = &lustre_export_operations;
-#endif
-
- /* make root inode
- * XXX: move this to after cbd setup?
- */
- valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
- if (sbi->ll_flags & LL_SBI_ACL)
- valid |= OBD_MD_FLACL;
-
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data) {
- err = -ENOMEM;
- goto out_lock_cn_cb;
- }
-
- op_data->op_fid1 = sbi->ll_root_fid;
- op_data->op_mode = 0;
- op_data->op_valid = valid;
-
- err = md_getattr(sbi->ll_md_exp, op_data, &request);
- kfree(op_data);
- if (err) {
- CERROR("%s: md_getattr failed for root: rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name, err);
- goto out_lock_cn_cb;
- }
-
- err = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
- sbi->ll_md_exp, &lmd);
- if (err) {
- CERROR("failed to understand root inode md: rc = %d\n", err);
- ptlrpc_req_finished(request);
- goto out_lock_cn_cb;
- }
-
- LASSERT(fid_is_sane(&sbi->ll_root_fid));
- root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid,
- sbi->ll_flags & LL_SBI_32BIT_API),
- &lmd);
- md_free_lustre_md(sbi->ll_md_exp, &lmd);
- ptlrpc_req_finished(request);
-
- if (IS_ERR(root)) {
-#ifdef CONFIG_FS_POSIX_ACL
- if (lmd.posix_acl) {
- posix_acl_release(lmd.posix_acl);
- lmd.posix_acl = NULL;
- }
-#endif
- err = -EBADF;
- CERROR("lustre_lite: bad iget4 for root\n");
- goto out_root;
- }
-
- checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
- err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
- KEY_CHECKSUM, sizeof(checksum), &checksum,
- NULL);
- if (err) {
- CERROR("%s: Set checksum failed: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, err);
- goto out_root;
- }
- cl_sb_init(sb);
-
- err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
- KEY_CACHE_SET, sizeof(*sbi->ll_cache),
- sbi->ll_cache, NULL);
- if (err) {
- CERROR("%s: Set cache_set failed: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, err);
- goto out_root;
- }
-
- sb->s_root = d_make_root(root);
- if (!sb->s_root) {
- CERROR("%s: can't make root dentry\n",
- ll_get_fsname(sb, NULL, 0));
- err = -ENOMEM;
- goto out_lock_cn_cb;
- }
-
- sbi->ll_sdev_orig = sb->s_dev;
-
- /* We set sb->s_dev equal on all lustre clients in order to support
- * NFS export clustering. NFSD requires that the FSID be the same
- * on all clients.
- */
- /* s_dev is also used in lt_compare() to compare two fs, but that is
- * only a node-local comparison.
- */
- uuid = obd_get_uuid(sbi->ll_md_exp);
- if (uuid) {
- sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid));
- get_uuid2fsid(uuid->uuid, strlen(uuid->uuid), &sbi->ll_fsid);
- }
-
- kfree(data);
- kfree(osfs);
-
- if (llite_root) {
- err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
- if (err < 0) {
- CERROR("%s: could not register mount in debugfs: "
- "rc = %d\n", ll_get_fsname(sb, NULL, 0), err);
- err = 0;
- }
- }
-
- return err;
-out_root:
- iput(root);
-out_lock_cn_cb:
- obd_fid_fini(sbi->ll_dt_exp->exp_obd);
-out_dt:
- obd_disconnect(sbi->ll_dt_exp);
- sbi->ll_dt_exp = NULL;
-out_md_fid:
- obd_fid_fini(sbi->ll_md_exp->exp_obd);
-out_md:
- obd_disconnect(sbi->ll_md_exp);
- sbi->ll_md_exp = NULL;
-out:
- kfree(data);
- kfree(osfs);
- return err;
-}
-
-int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
-{
- int size, rc;
-
- size = sizeof(*lmmsize);
- rc = obd_get_info(NULL, sbi->ll_dt_exp, sizeof(KEY_MAX_EASIZE),
- KEY_MAX_EASIZE, &size, lmmsize);
- if (rc) {
- CERROR("%s: cannot get max LOV EA size: rc = %d\n",
- sbi->ll_dt_exp->exp_obd->obd_name, rc);
- return rc;
- }
-
- size = sizeof(int);
- rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
- KEY_MAX_EASIZE, &size, lmmsize);
- if (rc)
- CERROR("Get max mdsize error rc %d\n", rc);
-
- return rc;
-}
-
-/**
- * Get the value of the default_easize parameter.
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] sbi superblock info for this filesystem
- * \param[out] lmmsize pointer to storage location for value
- *
- * \retval 0 on success
- * \retval negative negated errno on failure
- */
-int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
-{
- int size, rc;
-
- size = sizeof(int);
- rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE),
- KEY_DEFAULT_EASIZE, &size, lmmsize);
- if (rc)
- CERROR("Get default mdsize error rc %d\n", rc);
-
- return rc;
-}
-
-/**
- * Set the default_easize parameter to the given value.
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] sbi superblock info for this filesystem
- * \param[in] lmmsize the size to set
- *
- * \retval 0 on success
- * \retval negative negated errno on failure
- */
-int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize)
-{
- if (lmmsize < sizeof(struct lov_mds_md) ||
- lmmsize > OBD_MAX_DEFAULT_EA_SIZE)
- return -EINVAL;
-
- return obd_set_info_async(NULL, sbi->ll_md_exp,
- sizeof(KEY_DEFAULT_EASIZE),
- KEY_DEFAULT_EASIZE,
- sizeof(int), &lmmsize, NULL);
-}
-
-static void client_common_put_super(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-
- cl_sb_fini(sb);
-
- obd_fid_fini(sbi->ll_dt_exp->exp_obd);
- obd_disconnect(sbi->ll_dt_exp);
- sbi->ll_dt_exp = NULL;
-
- ldebugfs_unregister_mountpoint(sbi);
-
- obd_fid_fini(sbi->ll_md_exp->exp_obd);
- obd_disconnect(sbi->ll_md_exp);
- sbi->ll_md_exp = NULL;
-}
-
-void ll_kill_super(struct super_block *sb)
-{
- struct ll_sb_info *sbi;
-
- /* not init sb ?*/
- if (!(sb->s_flags & SB_ACTIVE))
- return;
-
- sbi = ll_s2sbi(sb);
- /* we need to restore s_dev from changed for clustered NFS before
- * put_super because new kernels have cached s_dev and change sb->s_dev
- * in put_super not affected real removing devices
- */
- if (sbi) {
- sb->s_dev = sbi->ll_sdev_orig;
- sbi->ll_umounting = 1;
-
- /* wait running statahead threads to quit */
- while (atomic_read(&sbi->ll_sa_running) > 0) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC >> 3));
- }
- }
-}
-
-static inline int ll_set_opt(const char *opt, char *data, int fl)
-{
- if (strncmp(opt, data, strlen(opt)) != 0)
- return 0;
- else
- return fl;
-}
-
-/* non-client-specific mount options are parsed in lmd_parse */
-static int ll_options(char *options, int *flags)
-{
- int tmp;
- char *s1 = options, *s2;
-
- if (!options)
- return 0;
-
- CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
-
- while (*s1) {
- CDEBUG(D_SUPER, "next opt=%s\n", s1);
- tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("noflock", s1,
- LL_SBI_FLOCK | LL_SBI_LOCALFLOCK);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("context", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("fscontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("defcontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("rootcontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nouser_fid2path", s1, LL_SBI_USER_FID2PATH);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
-
- tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nochecksum", s1, LL_SBI_CHECKSUM);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("lazystatfs", s1, LL_SBI_LAZYSTATFS);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nolazystatfs", s1, LL_SBI_LAZYSTATFS);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("verbose", s1, LL_SBI_VERBOSE);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("noverbose", s1, LL_SBI_VERBOSE);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("always_ping", s1, LL_SBI_ALWAYS_PING);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
- s1);
- return -EINVAL;
-
-next:
- /* Find next opt */
- s2 = strchr(s1, ',');
- if (!s2)
- break;
- s1 = s2 + 1;
- }
- return 0;
-}
-
-void ll_lli_init(struct ll_inode_info *lli)
-{
- lli->lli_inode_magic = LLI_INODE_MAGIC;
- lli->lli_flags = 0;
- spin_lock_init(&lli->lli_lock);
- lli->lli_posix_acl = NULL;
- /* Do not set lli_fid, it has been initialized already. */
- fid_zero(&lli->lli_pfid);
- lli->lli_mds_read_och = NULL;
- lli->lli_mds_write_och = NULL;
- lli->lli_mds_exec_och = NULL;
- lli->lli_open_fd_read_count = 0;
- lli->lli_open_fd_write_count = 0;
- lli->lli_open_fd_exec_count = 0;
- mutex_init(&lli->lli_och_mutex);
- spin_lock_init(&lli->lli_agl_lock);
- spin_lock_init(&lli->lli_layout_lock);
- ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE);
- lli->lli_clob = NULL;
-
- init_rwsem(&lli->lli_xattrs_list_rwsem);
- mutex_init(&lli->lli_xattrs_enq_lock);
-
- LASSERT(lli->lli_vfs_inode.i_mode != 0);
- if (S_ISDIR(lli->lli_vfs_inode.i_mode)) {
- mutex_init(&lli->lli_readdir_mutex);
- lli->lli_opendir_key = NULL;
- lli->lli_sai = NULL;
- spin_lock_init(&lli->lli_sa_lock);
- lli->lli_opendir_pid = 0;
- lli->lli_sa_enabled = 0;
- lli->lli_def_stripe_offset = -1;
- } else {
- mutex_init(&lli->lli_size_mutex);
- lli->lli_symlink_name = NULL;
- init_rwsem(&lli->lli_trunc_sem);
- range_lock_tree_init(&lli->lli_write_tree);
- init_rwsem(&lli->lli_glimpse_sem);
- lli->lli_glimpse_time = 0;
- INIT_LIST_HEAD(&lli->lli_agl_list);
- lli->lli_agl_index = 0;
- lli->lli_async_rc = 0;
- }
- mutex_init(&lli->lli_layout_mutex);
-}
-
-int ll_fill_super(struct super_block *sb)
-{
- struct lustre_profile *lprof = NULL;
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct ll_sb_info *sbi;
- char *dt = NULL, *md = NULL;
- char *profilenm = get_profile_name(sb);
- struct config_llog_instance *cfg;
- int err;
- static atomic_t ll_bdi_num = ATOMIC_INIT(0);
-
- CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-
- err = ptlrpc_inc_ref();
- if (err)
- return err;
-
- cfg = kzalloc(sizeof(*cfg), GFP_NOFS);
- if (!cfg) {
- err = -ENOMEM;
- goto out_put;
- }
-
- try_module_get(THIS_MODULE);
-
- /* client additional sb info */
- sbi = ll_init_sbi(sb);
- lsi->lsi_llsbi = sbi;
- if (!sbi) {
- module_put(THIS_MODULE);
- kfree(cfg);
- err = -ENOMEM;
- goto out_put;
- }
-
- err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
- if (err)
- goto out_free;
-
- err = super_setup_bdi_name(sb, "lustre-%d",
- atomic_inc_return(&ll_bdi_num));
- if (err)
- goto out_free;
-
- /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
- sb->s_d_op = &ll_d_ops;
-
- /* Generate a string unique to this super, in case some joker tries
- * to mount the same fs at two mount points.
- * Use the address of the super itself.
- */
- cfg->cfg_instance = sb;
- cfg->cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
- cfg->cfg_callback = class_config_llog_handler;
- /* set up client obds */
- err = lustre_process_log(sb, profilenm, cfg);
- if (err < 0)
- goto out_free;
-
- /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
- lprof = class_get_profile(profilenm);
- if (!lprof) {
- LCONSOLE_ERROR_MSG(0x156,
- "The client profile '%s' could not be read from the MGS. Does that filesystem exist?\n",
- profilenm);
- err = -EINVAL;
- goto out_free;
- }
- CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
- lprof->lp_md, lprof->lp_dt);
-
- dt = kasprintf(GFP_NOFS, "%s-%p", lprof->lp_dt, cfg->cfg_instance);
- if (!dt) {
- err = -ENOMEM;
- goto out_free;
- }
-
- md = kasprintf(GFP_NOFS, "%s-%p", lprof->lp_md, cfg->cfg_instance);
- if (!md) {
- err = -ENOMEM;
- goto out_free;
- }
-
- /* connections, registrations, sb setup */
- err = client_common_fill_super(sb, md, dt);
- if (!err)
- sbi->ll_client_common_fill_super_succeeded = 1;
-
-out_free:
- kfree(md);
- kfree(dt);
- if (lprof)
- class_put_profile(lprof);
- if (err)
- ll_put_super(sb);
- else if (sbi->ll_flags & LL_SBI_VERBOSE)
- LCONSOLE_WARN("Mounted %s\n", profilenm);
-
- kfree(cfg);
-out_put:
- if (err)
- ptlrpc_dec_ref();
- return err;
-} /* ll_fill_super */
-
-void ll_put_super(struct super_block *sb)
-{
- struct config_llog_instance cfg, params_cfg;
- struct obd_device *obd;
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- char *profilenm = get_profile_name(sb);
- int next, force = 1, rc = 0;
- long ccc_count;
-
- CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
-
- cfg.cfg_instance = sb;
- lustre_end_log(sb, profilenm, &cfg);
-
- params_cfg.cfg_instance = sb;
- lustre_end_log(sb, PARAMS_FILENAME, &params_cfg);
-
- if (sbi->ll_md_exp) {
- obd = class_exp2obd(sbi->ll_md_exp);
- if (obd)
- force = obd->obd_force;
- }
-
- /* Wait for unstable pages to be committed to stable storage */
- if (!force)
- rc = l_wait_event_abortable(sbi->ll_cache->ccc_unstable_waitq,
- !atomic_long_read(&sbi->ll_cache->ccc_unstable_nr));
-
- ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr);
- if (!force && rc != -ERESTARTSYS)
- LASSERTF(!ccc_count, "count: %li\n", ccc_count);
-
- /* We need to set force before the lov_disconnect in
- * lustre_common_put_super, since l_d cleans up osc's as well.
- */
- if (force) {
- next = 0;
- while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
- &next)) != NULL) {
- obd->obd_force = force;
- }
- }
-
- if (sbi->ll_client_common_fill_super_succeeded) {
- /* Only if client_common_fill_super succeeded */
- client_common_put_super(sb);
- }
-
- next = 0;
- while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)))
- class_manual_cleanup(obd);
-
- if (sbi->ll_flags & LL_SBI_VERBOSE)
- LCONSOLE_WARN("Unmounted %s\n", profilenm ? profilenm : "");
-
- if (profilenm)
- class_del_profile(profilenm);
-
- ll_free_sbi(sb);
- lsi->lsi_llsbi = NULL;
-
- lustre_common_put_super(sb);
-
- cl_env_cache_purge(~0);
-
- module_put(THIS_MODULE);
-
- ptlrpc_dec_ref();
-} /* client_put_super */
-
-struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
-{
- struct inode *inode = NULL;
-
- /* NOTE: we depend on atomic igrab() -bzzz */
- lock_res_and_lock(lock);
- if (lock->l_resource->lr_lvb_inode) {
- struct ll_inode_info *lli;
-
- lli = ll_i2info(lock->l_resource->lr_lvb_inode);
- if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
- inode = igrab(lock->l_resource->lr_lvb_inode);
- } else {
- inode = lock->l_resource->lr_lvb_inode;
- LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ? D_INFO :
- D_WARNING, lock,
- "lr_lvb_inode %p is bogus: magic %08x",
- lock->l_resource->lr_lvb_inode,
- lli->lli_inode_magic);
- inode = NULL;
- }
- }
- unlock_res_and_lock(lock);
- return inode;
-}
-
-void ll_dir_clear_lsm_md(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- LASSERT(S_ISDIR(inode->i_mode));
-
- if (lli->lli_lsm_md) {
- lmv_free_memmd(lli->lli_lsm_md);
- lli->lli_lsm_md = NULL;
- }
-}
-
-static struct inode *ll_iget_anon_dir(struct super_block *sb,
- const struct lu_fid *fid,
- struct lustre_md *md)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct mdt_body *body = md->body;
- struct inode *inode;
- ino_t ino;
-
- ino = cl_fid_build_ino(fid, sbi->ll_flags & LL_SBI_32BIT_API);
- inode = iget_locked(sb, ino);
- if (!inode) {
- CERROR("%s: failed get simple inode " DFID ": rc = -ENOENT\n",
- ll_get_fsname(sb, NULL, 0), PFID(fid));
- return ERR_PTR(-ENOENT);
- }
-
- if (inode->i_state & I_NEW) {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lmv_stripe_md *lsm = md->lmv;
-
- inode->i_mode = (inode->i_mode & ~S_IFMT) |
- (body->mbo_mode & S_IFMT);
- LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode " DFID "\n",
- PFID(fid));
-
- LTIME_S(inode->i_mtime) = 0;
- LTIME_S(inode->i_atime) = 0;
- LTIME_S(inode->i_ctime) = 0;
- inode->i_rdev = 0;
-
- inode->i_op = &ll_dir_inode_operations;
- inode->i_fop = &ll_dir_operations;
- lli->lli_fid = *fid;
- ll_lli_init(lli);
-
- LASSERT(lsm);
- /* master object FID */
- lli->lli_pfid = body->mbo_fid1;
- CDEBUG(D_INODE, "lli %p slave " DFID " master " DFID "\n",
- lli, PFID(fid), PFID(&lli->lli_pfid));
- unlock_new_inode(inode);
- }
-
- return inode;
-}
-
-static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
-{
- struct lmv_stripe_md *lsm = md->lmv;
- struct lu_fid *fid;
- int i;
-
- LASSERT(lsm);
- /*
- * XXX sigh, this lsm_root initialization should be in
- * LMV layer, but it needs ll_iget right now, so we
- * put this here right now.
- */
- for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
- fid = &lsm->lsm_md_oinfo[i].lmo_fid;
- LASSERT(!lsm->lsm_md_oinfo[i].lmo_root);
- /* Unfortunately ll_iget will call ll_update_inode,
- * where the initialization of slave inode is slightly
- * different, so it reset lsm_md to NULL to avoid
- * initializing lsm for slave inode.
- */
- /* For migrating inode, master stripe and master object will
- * be same, so we only need assign this inode
- */
- if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && !i)
- lsm->lsm_md_oinfo[i].lmo_root = inode;
- else
- lsm->lsm_md_oinfo[i].lmo_root =
- ll_iget_anon_dir(inode->i_sb, fid, md);
- if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) {
- int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root);
-
- lsm->lsm_md_oinfo[i].lmo_root = NULL;
- return rc;
- }
- }
-
- return 0;
-}
-
-static inline int lli_lsm_md_eq(const struct lmv_stripe_md *lsm_md1,
- const struct lmv_stripe_md *lsm_md2)
-{
- return lsm_md1->lsm_md_magic == lsm_md2->lsm_md_magic &&
- lsm_md1->lsm_md_stripe_count == lsm_md2->lsm_md_stripe_count &&
- lsm_md1->lsm_md_master_mdt_index ==
- lsm_md2->lsm_md_master_mdt_index &&
- lsm_md1->lsm_md_hash_type == lsm_md2->lsm_md_hash_type &&
- lsm_md1->lsm_md_layout_version ==
- lsm_md2->lsm_md_layout_version &&
- !strcmp(lsm_md1->lsm_md_pool_name,
- lsm_md2->lsm_md_pool_name);
-}
-
-static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lmv_stripe_md *lsm = md->lmv;
- int rc;
-
- LASSERT(S_ISDIR(inode->i_mode));
- CDEBUG(D_INODE, "update lsm %p of " DFID "\n", lli->lli_lsm_md,
- PFID(ll_inode2fid(inode)));
-
- /* no striped information from request. */
- if (!lsm) {
- if (!lli->lli_lsm_md) {
- return 0;
- } else if (lli->lli_lsm_md->lsm_md_hash_type &
- LMV_HASH_FLAG_MIGRATION) {
- /*
- * migration is done, the temporay MIGRATE layout has
- * been removed
- */
- CDEBUG(D_INODE, DFID " finish migration.\n",
- PFID(ll_inode2fid(inode)));
- lmv_free_memmd(lli->lli_lsm_md);
- lli->lli_lsm_md = NULL;
- return 0;
- }
- /*
- * The lustre_md from req does not include stripeEA,
- * see ll_md_setattr
- */
- return 0;
- }
-
- /* set the directory layout */
- if (!lli->lli_lsm_md) {
- struct cl_attr *attr;
-
- rc = ll_init_lsm_md(inode, md);
- if (rc)
- return rc;
-
- /*
- * set lsm_md to NULL, so the following free lustre_md
- * will not free this lsm
- */
- md->lmv = NULL;
- lli->lli_lsm_md = lsm;
-
- attr = kzalloc(sizeof(*attr), GFP_NOFS);
- if (!attr)
- return -ENOMEM;
-
- /* validate the lsm */
- rc = md_merge_attr(ll_i2mdexp(inode), lsm, attr,
- ll_md_blocking_ast);
- if (rc) {
- kfree(attr);
- return rc;
- }
-
- if (md->body->mbo_valid & OBD_MD_FLNLINK)
- md->body->mbo_nlink = attr->cat_nlink;
- if (md->body->mbo_valid & OBD_MD_FLSIZE)
- md->body->mbo_size = attr->cat_size;
- if (md->body->mbo_valid & OBD_MD_FLATIME)
- md->body->mbo_atime = attr->cat_atime;
- if (md->body->mbo_valid & OBD_MD_FLCTIME)
- md->body->mbo_ctime = attr->cat_ctime;
- if (md->body->mbo_valid & OBD_MD_FLMTIME)
- md->body->mbo_mtime = attr->cat_mtime;
-
- kfree(attr);
-
- CDEBUG(D_INODE, "Set lsm %p magic %x to " DFID "\n", lsm,
- lsm->lsm_md_magic, PFID(ll_inode2fid(inode)));
- return 0;
- }
-
- /* Compare the old and new stripe information */
- if (!lsm_md_eq(lli->lli_lsm_md, lsm)) {
- struct lmv_stripe_md *old_lsm = lli->lli_lsm_md;
- int idx;
-
- CERROR("%s: inode " DFID "(%p)'s lmv layout mismatch (%p)/(%p) magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n",
- ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid),
- inode, lsm, old_lsm,
- lsm->lsm_md_magic, old_lsm->lsm_md_magic,
- lsm->lsm_md_stripe_count,
- old_lsm->lsm_md_stripe_count,
- lsm->lsm_md_master_mdt_index,
- old_lsm->lsm_md_master_mdt_index,
- lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type,
- lsm->lsm_md_layout_version,
- old_lsm->lsm_md_layout_version,
- lsm->lsm_md_pool_name,
- old_lsm->lsm_md_pool_name);
-
- for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++) {
- CERROR("%s: sub FIDs in old lsm idx %d, old: " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0), idx,
- PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid));
- }
-
- for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++) {
- CERROR("%s: sub FIDs in new lsm idx %d, new: " DFID "\n",
- ll_get_fsname(inode->i_sb, NULL, 0), idx,
- PFID(&lsm->lsm_md_oinfo[idx].lmo_fid));
- }
-
- return -EIO;
- }
-
- return 0;
-}
-
-void ll_clear_inode(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- if (S_ISDIR(inode->i_mode)) {
- /* these should have been cleared in ll_file_release */
- LASSERT(!lli->lli_opendir_key);
- LASSERT(!lli->lli_sai);
- LASSERT(lli->lli_opendir_pid == 0);
- }
-
- md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
-
- LASSERT(!lli->lli_open_fd_write_count);
- LASSERT(!lli->lli_open_fd_read_count);
- LASSERT(!lli->lli_open_fd_exec_count);
-
- if (lli->lli_mds_write_och)
- ll_md_real_close(inode, FMODE_WRITE);
- if (lli->lli_mds_exec_och)
- ll_md_real_close(inode, FMODE_EXEC);
- if (lli->lli_mds_read_och)
- ll_md_real_close(inode, FMODE_READ);
-
- if (S_ISLNK(inode->i_mode)) {
- kfree(lli->lli_symlink_name);
- lli->lli_symlink_name = NULL;
- }
-
- ll_xattr_cache_destroy(inode);
-
-#ifdef CONFIG_FS_POSIX_ACL
- forget_all_cached_acls(inode);
- if (lli->lli_posix_acl) {
- posix_acl_release(lli->lli_posix_acl);
- lli->lli_posix_acl = NULL;
- }
-#endif
- lli->lli_inode_magic = LLI_INODE_DEAD;
-
- if (S_ISDIR(inode->i_mode))
- ll_dir_clear_lsm_md(inode);
- if (S_ISREG(inode->i_mode) && !is_bad_inode(inode))
- LASSERT(list_empty(&lli->lli_agl_list));
-
- /*
- * XXX This has to be done before lsm is freed below, because
- * cl_object still uses inode lsm.
- */
- cl_inode_fini(inode);
-}
-
-#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
-
-static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
-{
- struct lustre_md md;
- struct inode *inode = d_inode(dentry);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *request = NULL;
- int rc, ia_valid;
-
- op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &request);
- if (rc) {
- ptlrpc_req_finished(request);
- if (rc == -ENOENT) {
- clear_nlink(inode);
- /* Unlinked special device node? Or just a race?
- * Pretend we did everything.
- */
- if (!S_ISREG(inode->i_mode) &&
- !S_ISDIR(inode->i_mode)) {
- ia_valid = op_data->op_attr.ia_valid;
- op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS;
- rc = simple_setattr(dentry, &op_data->op_attr);
- op_data->op_attr.ia_valid = ia_valid;
- }
- } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) {
- CERROR("md_setattr fails: rc = %d\n", rc);
- }
- return rc;
- }
-
- rc = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
- sbi->ll_md_exp, &md);
- if (rc) {
- ptlrpc_req_finished(request);
- return rc;
- }
-
- ia_valid = op_data->op_attr.ia_valid;
- /* inode size will be in cl_setattr_ost, can't do it now since dirty
- * cache is not cleared yet.
- */
- op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
- if (S_ISREG(inode->i_mode))
- inode_lock(inode);
- rc = simple_setattr(dentry, &op_data->op_attr);
- if (S_ISREG(inode->i_mode))
- inode_unlock(inode);
- op_data->op_attr.ia_valid = ia_valid;
-
- rc = ll_update_inode(inode, &md);
- ptlrpc_req_finished(request);
-
- return rc;
-}
-
-/* If this inode has objects allocated to it (lsm != NULL), then the OST
- * object(s) determine the file size and mtime. Otherwise, the MDS will
- * keep these values until such a time that objects are allocated for it.
- * We do the MDS operations first, as it is checking permissions for us.
- * We don't to the MDS RPC if there is nothing that we want to store there,
- * otherwise there is no harm in updating mtime/atime on the MDS if we are
- * going to do an RPC anyways.
- *
- * If we are doing a truncate, we will send the mtime and ctime updates
- * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
- * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
- * at the same time.
- *
- * In case of HSMimport, we only set attr on MDS.
- */
-int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
-{
- struct inode *inode = d_inode(dentry);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct md_op_data *op_data = NULL;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "%s: setattr inode " DFID "(%p) from %llu to %llu, valid %x, hsm_import %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode,
- i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import);
-
- if (attr->ia_valid & ATTR_SIZE) {
- /* Check new size against VFS/VM file size limit and rlimit */
- rc = inode_newsize_ok(inode, attr->ia_size);
- if (rc)
- return rc;
-
- /* The maximum Lustre file size is variable, based on the
- * OST maximum object size and number of stripes. This
- * needs another check in addition to the VFS check above.
- */
- if (attr->ia_size > ll_file_maxbytes(inode)) {
- CDEBUG(D_INODE, "file " DFID " too large %llu > %llu\n",
- PFID(&lli->lli_fid), attr->ia_size,
- ll_file_maxbytes(inode));
- return -EFBIG;
- }
-
- attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
- }
-
- /* POSIX: check before ATTR_*TIME_SET set (from setattr_prepare) */
- if (attr->ia_valid & TIMES_SET_FLAGS) {
- if ((!uid_eq(current_fsuid(), inode->i_uid)) &&
- !capable(CAP_FOWNER))
- return -EPERM;
- }
-
- /* We mark all of the fields "set" so MDS/OST does not re-set them */
- if (attr->ia_valid & ATTR_CTIME) {
- attr->ia_ctime = current_time(inode);
- attr->ia_valid |= ATTR_CTIME_SET;
- }
- if (!(attr->ia_valid & ATTR_ATIME_SET) &&
- (attr->ia_valid & ATTR_ATIME)) {
- attr->ia_atime = current_time(inode);
- attr->ia_valid |= ATTR_ATIME_SET;
- }
- if (!(attr->ia_valid & ATTR_MTIME_SET) &&
- (attr->ia_valid & ATTR_MTIME)) {
- attr->ia_mtime = current_time(inode);
- attr->ia_valid |= ATTR_MTIME_SET;
- }
-
- if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
- CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %llu\n",
- LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
- (s64)ktime_get_real_seconds());
-
- if (S_ISREG(inode->i_mode))
- inode_unlock(inode);
-
- /*
- * We always do an MDS RPC, even if we're only changing the size;
- * only the MDS knows whether truncate() should fail with -ETXTBUSY
- */
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data) {
- rc = -ENOMEM;
- goto out;
- }
-
- if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
- /*
- * If we are changing file size, file content is
- * modified, flag it.
- */
- attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
- op_data->op_bias |= MDS_DATA_MODIFIED;
- clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags);
- }
-
- op_data->op_attr = *attr;
-
- rc = ll_md_setattr(dentry, op_data);
- if (rc)
- goto out;
-
- if (!S_ISREG(inode->i_mode) || hsm_import) {
- rc = 0;
- goto out;
- }
-
- if (attr->ia_valid & (ATTR_SIZE |
- ATTR_ATIME | ATTR_ATIME_SET |
- ATTR_MTIME | ATTR_MTIME_SET)) {
- /* For truncate and utimes sending attributes to OSTs, setting
- * mtime/atime to the past will be performed under PW [0:EOF]
- * extent lock (new_size:EOF for truncate). It may seem
- * excessive to send mtime/atime updates to OSTs when not
- * setting times to past, but it is necessary due to possible
- * time de-synchronization between MDT inode and OST objects
- */
- rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, attr, 0);
- }
-
- /*
- * If the file was restored, it needs to set dirty flag.
- *
- * We've already sent MDS_DATA_MODIFIED flag in
- * ll_md_setattr() for truncate. However, the MDT refuses to
- * set the HS_DIRTY flag on released files, so we have to set
- * it again if the file has been restored. Please check how
- * LLIF_DATA_MODIFIED is set in vvp_io_setattr_fini().
- *
- * Please notice that if the file is not released, the previous
- * MDS_DATA_MODIFIED has taken effect and usually
- * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()).
- * This way we can save an RPC for common open + trunc
- * operation.
- */
- if (test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) {
- struct hsm_state_set hss = {
- .hss_valid = HSS_SETMASK,
- .hss_setmask = HS_DIRTY,
- };
- int rc2;
-
- rc2 = ll_hsm_state_set(inode, &hss);
- /*
- * truncate and write can happen at the same time, so that
- * the file can be set modified even though the file is not
- * restored from released state, and ll_hsm_state_set() is
- * not applicable for the file, and rc2 < 0 is normal in this
- * case.
- */
- if (rc2 < 0)
- CDEBUG(D_INFO, DFID "HSM set dirty failed: rc2 = %d\n",
- PFID(ll_inode2fid(inode)), rc2);
- }
-
-out:
- if (op_data)
- ll_finish_md_op_data(op_data);
-
- if (S_ISREG(inode->i_mode)) {
- inode_lock(inode);
- if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
- inode_dio_wait(inode);
- }
-
- ll_stats_ops_tally(ll_i2sbi(inode), (attr->ia_valid & ATTR_SIZE) ?
- LPROC_LL_TRUNC : LPROC_LL_SETATTR, 1);
-
- return rc;
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
- int mode = d_inode(de)->i_mode;
-
- if ((attr->ia_valid & (ATTR_CTIME | ATTR_SIZE | ATTR_MODE)) ==
- (ATTR_CTIME | ATTR_SIZE | ATTR_MODE))
- attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
-
- if (((attr->ia_valid & (ATTR_MODE | ATTR_FORCE | ATTR_SIZE)) ==
- (ATTR_SIZE | ATTR_MODE)) &&
- (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) ||
- (((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) &&
- !(attr->ia_mode & S_ISGID))))
- attr->ia_valid |= ATTR_FORCE;
-
- if ((attr->ia_valid & ATTR_MODE) &&
- (mode & S_ISUID) &&
- !(attr->ia_mode & S_ISUID) &&
- !(attr->ia_valid & ATTR_KILL_SUID))
- attr->ia_valid |= ATTR_KILL_SUID;
-
- if ((attr->ia_valid & ATTR_MODE) &&
- ((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) &&
- !(attr->ia_mode & S_ISGID) &&
- !(attr->ia_valid & ATTR_KILL_SGID))
- attr->ia_valid |= ATTR_KILL_SGID;
-
- return ll_setattr_raw(de, attr, false);
-}
-
-int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
- __u64 max_age, __u32 flags)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_statfs obd_osfs;
- int rc;
-
- rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
- if (rc) {
- CERROR("md_statfs fails: rc = %d\n", rc);
- return rc;
- }
-
- osfs->os_type = sb->s_magic;
-
- CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
- osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,
- osfs->os_files);
-
- if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
- flags |= OBD_STATFS_NODELAY;
-
- rc = obd_statfs_rqset(sbi->ll_dt_exp, &obd_osfs, max_age, flags);
- if (rc) {
- CERROR("obd_statfs fails: rc = %d\n", rc);
- return rc;
- }
-
- CDEBUG(D_SUPER, "OSC blocks %llu/%llu objects %llu/%llu\n",
- obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
- obd_osfs.os_files);
-
- osfs->os_bsize = obd_osfs.os_bsize;
- osfs->os_blocks = obd_osfs.os_blocks;
- osfs->os_bfree = obd_osfs.os_bfree;
- osfs->os_bavail = obd_osfs.os_bavail;
-
- /* If we don't have as many objects free on the OST as inodes
- * on the MDS, we reduce the total number of inodes to
- * compensate, so that the "inodes in use" number is correct.
- */
- if (obd_osfs.os_ffree < osfs->os_ffree) {
- osfs->os_files = (osfs->os_files - osfs->os_ffree) +
- obd_osfs.os_ffree;
- osfs->os_ffree = obd_osfs.os_ffree;
- }
-
- return rc;
-}
-
-int ll_statfs(struct dentry *de, struct kstatfs *sfs)
-{
- struct super_block *sb = de->d_sb;
- struct obd_statfs osfs;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op: at %llu jiffies\n", get_jiffies_64());
- ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
-
- /* Some amount of caching on the client is allowed */
- rc = ll_statfs_internal(sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- 0);
- if (rc)
- return rc;
-
- statfs_unpack(sfs, &osfs);
-
- /* We need to downshift for all 32-bit kernels, because we can't
- * tell if the kernel is being called via sys_statfs64() or not.
- * Stop before overflowing f_bsize - in which case it is better
- * to just risk EOVERFLOW if caller is using old sys_statfs().
- */
- if (sizeof(long) < 8) {
- while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
- sfs->f_bsize <<= 1;
-
- osfs.os_blocks >>= 1;
- osfs.os_bfree >>= 1;
- osfs.os_bavail >>= 1;
- }
- }
-
- sfs->f_blocks = osfs.os_blocks;
- sfs->f_bfree = osfs.os_bfree;
- sfs->f_bavail = osfs.os_bavail;
- sfs->f_fsid = ll_s2sbi(sb)->ll_fsid;
- return 0;
-}
-
-void ll_inode_size_lock(struct inode *inode)
-{
- struct ll_inode_info *lli;
-
- LASSERT(!S_ISDIR(inode->i_mode));
-
- lli = ll_i2info(inode);
- mutex_lock(&lli->lli_size_mutex);
-}
-
-void ll_inode_size_unlock(struct inode *inode)
-{
- struct ll_inode_info *lli;
-
- lli = ll_i2info(inode);
- mutex_unlock(&lli->lli_size_mutex);
-}
-
-int ll_update_inode(struct inode *inode, struct lustre_md *md)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct mdt_body *body = md->body;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- if (body->mbo_valid & OBD_MD_FLEASIZE)
- cl_file_inode_init(inode, md);
-
- if (S_ISDIR(inode->i_mode)) {
- int rc;
-
- rc = ll_update_lsm_md(inode, md);
- if (rc)
- return rc;
- }
-
-#ifdef CONFIG_FS_POSIX_ACL
- if (body->mbo_valid & OBD_MD_FLACL) {
- spin_lock(&lli->lli_lock);
- if (lli->lli_posix_acl)
- posix_acl_release(lli->lli_posix_acl);
- lli->lli_posix_acl = md->posix_acl;
- spin_unlock(&lli->lli_lock);
- }
-#endif
- inode->i_ino = cl_fid_build_ino(&body->mbo_fid1,
- sbi->ll_flags & LL_SBI_32BIT_API);
- inode->i_generation = cl_fid_build_gen(&body->mbo_fid1);
-
- if (body->mbo_valid & OBD_MD_FLATIME) {
- if (body->mbo_atime > LTIME_S(inode->i_atime))
- LTIME_S(inode->i_atime) = body->mbo_atime;
- lli->lli_atime = body->mbo_atime;
- }
- if (body->mbo_valid & OBD_MD_FLMTIME) {
- if (body->mbo_mtime > LTIME_S(inode->i_mtime)) {
- CDEBUG(D_INODE,
- "setting ino %lu mtime from %lu to %llu\n",
- inode->i_ino, LTIME_S(inode->i_mtime),
- body->mbo_mtime);
- LTIME_S(inode->i_mtime) = body->mbo_mtime;
- }
- lli->lli_mtime = body->mbo_mtime;
- }
- if (body->mbo_valid & OBD_MD_FLCTIME) {
- if (body->mbo_ctime > LTIME_S(inode->i_ctime))
- LTIME_S(inode->i_ctime) = body->mbo_ctime;
- lli->lli_ctime = body->mbo_ctime;
- }
- if (body->mbo_valid & OBD_MD_FLMODE)
- inode->i_mode = (inode->i_mode & S_IFMT) |
- (body->mbo_mode & ~S_IFMT);
- if (body->mbo_valid & OBD_MD_FLTYPE)
- inode->i_mode = (inode->i_mode & ~S_IFMT) |
- (body->mbo_mode & S_IFMT);
- LASSERT(inode->i_mode != 0);
- if (S_ISREG(inode->i_mode))
- inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1,
- LL_MAX_BLKSIZE_BITS);
- else
- inode->i_blkbits = inode->i_sb->s_blocksize_bits;
- if (body->mbo_valid & OBD_MD_FLUID)
- inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid);
- if (body->mbo_valid & OBD_MD_FLGID)
- inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid);
- if (body->mbo_valid & OBD_MD_FLFLAGS)
- inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags);
- if (body->mbo_valid & OBD_MD_FLNLINK)
- set_nlink(inode, body->mbo_nlink);
- if (body->mbo_valid & OBD_MD_FLRDEV)
- inode->i_rdev = old_decode_dev(body->mbo_rdev);
-
- if (body->mbo_valid & OBD_MD_FLID) {
- /* FID shouldn't be changed! */
- if (fid_is_sane(&lli->lli_fid)) {
- LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1),
- "Trying to change FID " DFID " to the " DFID ", inode " DFID "(%p)\n",
- PFID(&lli->lli_fid), PFID(&body->mbo_fid1),
- PFID(ll_inode2fid(inode)), inode);
- } else {
- lli->lli_fid = body->mbo_fid1;
- }
- }
-
- LASSERT(fid_seq(&lli->lli_fid) != 0);
-
- if (body->mbo_valid & OBD_MD_FLSIZE) {
- i_size_write(inode, body->mbo_size);
-
- CDEBUG(D_VFSTRACE, "inode=" DFID ", updating i_size %llu\n",
- PFID(ll_inode2fid(inode)),
- (unsigned long long)body->mbo_size);
-
- if (body->mbo_valid & OBD_MD_FLBLOCKS)
- inode->i_blocks = body->mbo_blocks;
- }
-
- if (body->mbo_valid & OBD_MD_TSTATE) {
- if (body->mbo_t_state & MS_RESTORE)
- set_bit(LLIF_FILE_RESTORING, &lli->lli_flags);
- }
-
- return 0;
-}
-
-int ll_read_inode2(struct inode *inode, void *opaque)
-{
- struct lustre_md *md = opaque;
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(&lli->lli_fid), inode);
-
- /* Core attributes from the MDS first. This is a new inode, and
- * the VFS doesn't zero times in the core inode so we have to do
- * it ourselves. They will be overwritten by either MDS or OST
- * attributes - we just need to make sure they aren't newer.
- */
- LTIME_S(inode->i_mtime) = 0;
- LTIME_S(inode->i_atime) = 0;
- LTIME_S(inode->i_ctime) = 0;
- inode->i_rdev = 0;
- rc = ll_update_inode(inode, md);
- if (rc)
- return rc;
-
- /* OIDEBUG(inode); */
-
- if (S_ISREG(inode->i_mode)) {
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- inode->i_op = &ll_file_inode_operations;
- inode->i_fop = sbi->ll_fop;
- inode->i_mapping->a_ops = (struct address_space_operations *)&ll_aops;
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &ll_dir_inode_operations;
- inode->i_fop = &ll_dir_operations;
- } else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &ll_fast_symlink_inode_operations;
- } else {
- inode->i_op = &ll_special_inode_operations;
-
- init_special_inode(inode, inode->i_mode,
- inode->i_rdev);
- }
-
- return 0;
-}
-
-void ll_delete_inode(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- if (S_ISREG(inode->i_mode) && lli->lli_clob)
- /* discard all dirty pages before truncating them, required by
- * osc_extent implementation at LU-1030.
- */
- cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
- CL_FSYNC_LOCAL, 1);
-
- truncate_inode_pages_final(&inode->i_data);
-
- LASSERTF(!inode->i_data.nrpages,
- "inode=" DFID "(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
- PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages);
-
- ll_clear_inode(inode);
- clear_inode(inode);
-}
-
-int ll_iocontrol(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- int rc, flags = 0;
-
- switch (cmd) {
- case FSFILT_IOC_GETFLAGS: {
- struct mdt_body *body;
- struct md_op_data *op_data;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
- 0, 0, LUSTRE_OPC_ANY,
- NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_FLFLAGS;
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc) {
- CERROR("%s: failure inode " DFID ": rc = %d\n",
- sbi->ll_md_exp->exp_obd->obd_name,
- PFID(ll_inode2fid(inode)), rc);
- return -abs(rc);
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
- flags = body->mbo_flags;
-
- ptlrpc_req_finished(req);
-
- return put_user(flags, (int __user *)arg);
- }
- case FSFILT_IOC_SETFLAGS: {
- struct md_op_data *op_data;
- struct cl_object *obj;
- struct iattr *attr;
-
- if (get_user(flags, (int __user *)arg))
- return -EFAULT;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_attr_flags = flags;
- op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
- rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &req);
- ll_finish_md_op_data(op_data);
- ptlrpc_req_finished(req);
- if (rc)
- return rc;
-
- inode->i_flags = ll_ext_to_inode_flags(flags);
-
- obj = ll_i2info(inode)->lli_clob;
- if (!obj)
- return 0;
-
- attr = kzalloc(sizeof(*attr), GFP_NOFS);
- if (!attr)
- return -ENOMEM;
-
- attr->ia_valid = ATTR_ATTR_FLAG;
- rc = cl_setattr_ost(obj, attr, flags);
- kfree(attr);
- return rc;
- }
- default:
- return -ENOSYS;
- }
-
- return 0;
-}
-
-int ll_flush_ctx(struct inode *inode)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- CDEBUG(D_SEC, "flush context for user %d\n",
- from_kuid(&init_user_ns, current_uid()));
-
- obd_set_info_async(NULL, sbi->ll_md_exp,
- sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
- 0, NULL, NULL);
- obd_set_info_async(NULL, sbi->ll_dt_exp,
- sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
- 0, NULL, NULL);
- return 0;
-}
-
-/* umount -f client means force down, don't save state */
-void ll_umount_begin(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct obd_ioctl_data *ioc_data;
- int cnt = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
- sb->s_count, atomic_read(&sb->s_active));
-
- obd = class_exp2obd(sbi->ll_md_exp);
- if (!obd) {
- CERROR("Invalid MDC connection handle %#llx\n",
- sbi->ll_md_exp->exp_handle.h_cookie);
- return;
- }
- obd->obd_force = 1;
-
- obd = class_exp2obd(sbi->ll_dt_exp);
- if (!obd) {
- CERROR("Invalid LOV connection handle %#llx\n",
- sbi->ll_dt_exp->exp_handle.h_cookie);
- return;
- }
- obd->obd_force = 1;
-
- ioc_data = kzalloc(sizeof(*ioc_data), GFP_NOFS);
- if (ioc_data) {
- obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp,
- sizeof(*ioc_data), ioc_data, NULL);
-
- obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp,
- sizeof(*ioc_data), ioc_data, NULL);
-
- kfree(ioc_data);
- }
-
- /* Really, we'd like to wait until there are no requests outstanding,
- * and then continue. For now, we just periodically checking for vfs
- * to decrement mnt_cnt and hope to finish it within 10sec.
- */
- while (cnt < 10 && !may_umount(sbi->ll_mnt.mnt)) {
- schedule_timeout_uninterruptible(HZ);
- cnt++;
- }
-
- schedule();
-}
-
-int ll_remount_fs(struct super_block *sb, int *flags, char *data)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- char *profilenm = get_profile_name(sb);
- int err;
- __u32 read_only;
-
- if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
- read_only = *flags & SB_RDONLY;
- err = obd_set_info_async(NULL, sbi->ll_md_exp,
- sizeof(KEY_READ_ONLY),
- KEY_READ_ONLY, sizeof(read_only),
- &read_only, NULL);
- if (err) {
- LCONSOLE_WARN("Failed to remount %s %s (%d)\n",
- profilenm, read_only ?
- "read-only" : "read-write", err);
- return err;
- }
-
- if (read_only)
- sb->s_flags |= SB_RDONLY;
- else
- sb->s_flags &= ~SB_RDONLY;
-
- if (sbi->ll_flags & LL_SBI_VERBOSE)
- LCONSOLE_WARN("Remounted %s %s\n", profilenm,
- read_only ? "read-only" : "read-write");
- }
- return 0;
-}
-
-/**
- * Cleanup the open handle that is cached on MDT-side.
- *
- * For open case, the client side open handling thread may hit error
- * after the MDT grant the open. Under such case, the client should
- * send close RPC to the MDT as cleanup; otherwise, the open handle
- * on the MDT will be leaked there until the client umount or evicted.
- *
- * In further, if someone unlinked the file, because the open handle
- * holds the reference on such file/object, then it will block the
- * subsequent threads that want to locate such object via FID.
- *
- * \param[in] sb super block for this file-system
- * \param[in] open_req pointer to the original open request
- */
-void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req)
-{
- struct mdt_body *body;
- struct md_op_data *op_data;
- struct ptlrpc_request *close_req = NULL;
- struct obd_export *exp = ll_s2sbi(sb)->ll_md_exp;
-
- body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY);
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return;
-
- op_data->op_fid1 = body->mbo_fid1;
- op_data->op_handle = body->mbo_handle;
- op_data->op_mod_time = get_seconds();
- md_close(exp, op_data, NULL, &close_req);
- ptlrpc_req_finished(close_req);
- ll_finish_md_op_data(op_data);
-}
-
-int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
- struct super_block *sb, struct lookup_intent *it)
-{
- struct ll_sb_info *sbi = NULL;
- struct lustre_md md = { NULL };
- int rc;
-
- LASSERT(*inode || sb);
- sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
- rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp,
- sbi->ll_md_exp, &md);
- if (rc)
- goto cleanup;
-
- if (*inode) {
- rc = ll_update_inode(*inode, &md);
- if (rc)
- goto out;
- } else {
- LASSERT(sb);
-
- /*
- * At this point server returns to client's same fid as client
- * generated for creating. So using ->fid1 is okay here.
- */
- if (!fid_is_sane(&md.body->mbo_fid1)) {
- CERROR("%s: Fid is insane " DFID "\n",
- ll_get_fsname(sb, NULL, 0),
- PFID(&md.body->mbo_fid1));
- rc = -EINVAL;
- goto out;
- }
-
- *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1,
- sbi->ll_flags & LL_SBI_32BIT_API),
- &md);
- if (IS_ERR(*inode)) {
-#ifdef CONFIG_FS_POSIX_ACL
- if (md.posix_acl) {
- posix_acl_release(md.posix_acl);
- md.posix_acl = NULL;
- }
-#endif
- rc = PTR_ERR(*inode);
- CERROR("new_inode -fatal: rc %d\n", rc);
- goto out;
- }
- }
-
- /* Handling piggyback layout lock.
- * Layout lock can be piggybacked by getattr and open request.
- * The lsm can be applied to inode only if it comes with a layout lock
- * otherwise correct layout may be overwritten, for example:
- * 1. proc1: mdt returns a lsm but not granting layout
- * 2. layout was changed by another client
- * 3. proc2: refresh layout and layout lock granted
- * 4. proc1: to apply a stale layout
- */
- if (it && it->it_lock_mode != 0) {
- struct lustre_handle lockh;
- struct ldlm_lock *lock;
-
- lockh.cookie = it->it_lock_handle;
- lock = ldlm_handle2lock(&lockh);
- LASSERT(lock);
- if (ldlm_has_layout(lock)) {
- struct cl_object_conf conf;
-
- memset(&conf, 0, sizeof(conf));
- conf.coc_opc = OBJECT_CONF_SET;
- conf.coc_inode = *inode;
- conf.coc_lock = lock;
- conf.u.coc_layout = md.layout;
- (void)ll_layout_conf(*inode, &conf);
- }
- LDLM_LOCK_PUT(lock);
- }
-
-out:
- md_free_lustre_md(sbi->ll_md_exp, &md);
-cleanup:
- if (rc != 0 && it && it->it_op & IT_OPEN)
- ll_open_cleanup(sb ? sb : (*inode)->i_sb, req);
-
- return rc;
-}
-
-int ll_obd_statfs(struct inode *inode, void __user *arg)
-{
- struct ll_sb_info *sbi = NULL;
- struct obd_export *exp;
- char *buf = NULL;
- struct obd_ioctl_data *data = NULL;
- __u32 type;
- int len = 0, rc;
-
- if (!inode) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- sbi = ll_i2sbi(inode);
- if (!sbi) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- rc = obd_ioctl_getdata(&buf, &len, arg);
- if (rc)
- goto out_statfs;
-
- data = (void *)buf;
- if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
- !data->ioc_pbuf1 || !data->ioc_pbuf2) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- if (data->ioc_inllen1 != sizeof(__u32) ||
- data->ioc_inllen2 != sizeof(__u32) ||
- data->ioc_plen1 != sizeof(struct obd_statfs) ||
- data->ioc_plen2 != sizeof(struct obd_uuid)) {
- rc = -EINVAL;
- goto out_statfs;
- }
-
- memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
- if (type & LL_STATFS_LMV) {
- exp = sbi->ll_md_exp;
- } else if (type & LL_STATFS_LOV) {
- exp = sbi->ll_dt_exp;
- } else {
- rc = -ENODEV;
- goto out_statfs;
- }
-
- rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, NULL);
- if (rc)
- goto out_statfs;
-out_statfs:
- kvfree(buf);
- return rc;
-}
-
-int ll_process_config(struct lustre_cfg *lcfg)
-{
- char *ptr;
- void *sb;
- struct lprocfs_static_vars lvars;
- unsigned long x;
- int rc = 0;
-
- lprocfs_llite_init_vars(&lvars);
-
- /* The instance name contains the sb: lustre-client-aacfe000 */
- ptr = strrchr(lustre_cfg_string(lcfg, 0), '-');
- if (!ptr || !*(++ptr))
- return -EINVAL;
- rc = kstrtoul(ptr, 16, &x);
- if (rc != 0)
- return -EINVAL;
- sb = (void *)x;
- /* This better be a real Lustre superblock! */
- LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic ==
- LMD_MAGIC);
-
- /* Note we have not called client_common_fill_super yet, so
- * proc fns must be able to handle that!
- */
- rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars,
- lcfg, sb);
- if (rc > 0)
- rc = 0;
- return rc;
-}
-
-/* this function prepares md_op_data hint for passing ot down to MD stack. */
-struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
- struct inode *i1, struct inode *i2,
- const char *name, size_t namelen,
- u32 mode, __u32 opc, void *data)
-{
- if (!name) {
- /* Do not reuse namelen for something else. */
- if (namelen)
- return ERR_PTR(-EINVAL);
- } else {
- if (namelen > ll_i2sbi(i1)->ll_namelen)
- return ERR_PTR(-ENAMETOOLONG);
-
- if (!lu_name_is_valid_2(name, namelen))
- return ERR_PTR(-EINVAL);
- }
-
- if (!op_data)
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
-
- if (!op_data)
- return ERR_PTR(-ENOMEM);
-
- ll_i2gids(op_data->op_suppgids, i1, i2);
- op_data->op_fid1 = *ll_inode2fid(i1);
- op_data->op_default_stripe_offset = -1;
- if (S_ISDIR(i1->i_mode)) {
- op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
- if (opc == LUSTRE_OPC_MKDIR)
- op_data->op_default_stripe_offset =
- ll_i2info(i1)->lli_def_stripe_offset;
- }
-
- if (i2) {
- op_data->op_fid2 = *ll_inode2fid(i2);
- if (S_ISDIR(i2->i_mode))
- op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md;
- } else {
- fid_zero(&op_data->op_fid2);
- }
-
- if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH)
- op_data->op_cli_flags |= CLI_HASH64;
-
- if (ll_need_32bit_api(ll_i2sbi(i1)))
- op_data->op_cli_flags |= CLI_API32;
-
- op_data->op_name = name;
- op_data->op_namelen = namelen;
- op_data->op_mode = mode;
- op_data->op_mod_time = ktime_get_real_seconds();
- op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
- op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
- op_data->op_cap = cfs_curproc_cap_pack();
- if ((opc == LUSTRE_OPC_CREATE) && name &&
- filename_is_volatile(name, namelen, &op_data->op_mds))
- op_data->op_bias |= MDS_CREATE_VOLATILE;
- else
- op_data->op_mds = 0;
- op_data->op_data = data;
-
- return op_data;
-}
-
-void ll_finish_md_op_data(struct md_op_data *op_data)
-{
- kfree(op_data);
-}
-
-int ll_show_options(struct seq_file *seq, struct dentry *dentry)
-{
- struct ll_sb_info *sbi;
-
- LASSERT(seq && dentry);
- sbi = ll_s2sbi(dentry->d_sb);
-
- if (sbi->ll_flags & LL_SBI_NOLCK)
- seq_puts(seq, ",nolock");
-
- if (sbi->ll_flags & LL_SBI_FLOCK)
- seq_puts(seq, ",flock");
-
- if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
- seq_puts(seq, ",localflock");
-
- if (sbi->ll_flags & LL_SBI_USER_XATTR)
- seq_puts(seq, ",user_xattr");
-
- if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
- seq_puts(seq, ",lazystatfs");
-
- if (sbi->ll_flags & LL_SBI_USER_FID2PATH)
- seq_puts(seq, ",user_fid2path");
-
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
- seq_puts(seq, ",always_ping");
-
- return 0;
-}
-
-/**
- * Get obd name by cmd, and copy out to user space
- */
-int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct obd_device *obd;
-
- if (cmd == OBD_IOC_GETDTNAME)
- obd = class_exp2obd(sbi->ll_dt_exp);
- else if (cmd == OBD_IOC_GETMDNAME)
- obd = class_exp2obd(sbi->ll_md_exp);
- else
- return -EINVAL;
-
- if (!obd)
- return -ENOENT;
-
- if (copy_to_user((void __user *)arg, obd->obd_name,
- strlen(obd->obd_name) + 1))
- return -EFAULT;
-
- return 0;
-}
-
-/**
- * Get lustre file system name by \a sbi. If \a buf is provided(non-NULL), the
- * fsname will be returned in this buffer; otherwise, a static buffer will be
- * used to store the fsname and returned to caller.
- */
-char *ll_get_fsname(struct super_block *sb, char *buf, int buflen)
-{
- static char fsname_static[MTI_NAME_MAXLEN];
- struct lustre_sb_info *lsi = s2lsi(sb);
- char *ptr;
- int len;
-
- if (!buf) {
- /* this means the caller wants to use static buffer
- * and it doesn't care about race. Usually this is
- * in error reporting path
- */
- buf = fsname_static;
- buflen = sizeof(fsname_static);
- }
-
- len = strlen(lsi->lsi_lmd->lmd_profile);
- ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
- if (ptr && (strcmp(ptr, "-client") == 0))
- len -= 7;
-
- if (unlikely(len >= buflen))
- len = buflen - 1;
- strncpy(buf, lsi->lsi_lmd->lmd_profile, len);
- buf[len] = '\0';
-
- return buf;
-}
-
-void ll_dirty_page_discard_warn(struct page *page, int ioret)
-{
- char *buf, *path = NULL;
- struct dentry *dentry = NULL;
- struct vvp_object *obj = cl_inode2vvp(page->mapping->host);
-
- /* this can be called inside spin lock so use GFP_ATOMIC. */
- buf = (char *)__get_free_page(GFP_ATOMIC);
- if (buf) {
- dentry = d_find_alias(page->mapping->host);
- if (dentry)
- path = dentry_path_raw(dentry, buf, PAGE_SIZE);
- }
-
- CDEBUG(D_WARNING,
- "%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n",
- ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
- s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
- PFID(&obj->vob_header.coh_lu.loh_fid),
- (path && !IS_ERR(path)) ? path : "", ioret);
-
- if (dentry)
- dput(dentry);
-
- if (buf)
- free_page((unsigned long)buf);
-}
-
-ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
- struct lov_user_md **kbuf)
-{
- struct lov_user_md lum;
- ssize_t lum_size;
-
- if (copy_from_user(&lum, md, sizeof(lum))) {
- lum_size = -EFAULT;
- goto no_kbuf;
- }
-
- lum_size = ll_lov_user_md_size(&lum);
- if (lum_size < 0)
- goto no_kbuf;
-
- *kbuf = kzalloc(lum_size, GFP_NOFS);
- if (!*kbuf) {
- lum_size = -ENOMEM;
- goto no_kbuf;
- }
-
- if (copy_from_user(*kbuf, md, lum_size) != 0) {
- kfree(*kbuf);
- *kbuf = NULL;
- lum_size = -EFAULT;
- }
-no_kbuf:
- return lum_size;
-}
-
-/*
- * Compute llite root squash state after a change of root squash
- * configuration setting or add/remove of a lnet nid
- */
-void ll_compute_rootsquash_state(struct ll_sb_info *sbi)
-{
- struct root_squash_info *squash = &sbi->ll_squash;
- struct lnet_process_id id;
- bool matched;
- int i;
-
- /* Update norootsquash flag */
- down_write(&squash->rsi_sem);
- if (list_empty(&squash->rsi_nosquash_nids)) {
- sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
- } else {
- /*
- * Do not apply root squash as soon as one of our NIDs is
- * in the nosquash_nids list
- */
- matched = false;
- i = 0;
-
- while (LNetGetId(i++, &id) != -ENOENT) {
- if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
- continue;
- if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) {
- matched = true;
- break;
- }
- }
- if (matched)
- sbi->ll_flags |= LL_SBI_NOROOTSQUASH;
- else
- sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
- }
- up_write(&squash->rsi_sem);
-}
-
-/**
- * Parse linkea content to extract information about a given hardlink
- *
- * \param[in] ldata - Initialized linkea data
- * \param[in] linkno - Link identifier
- * \param[out] parent_fid - The entry's parent FID
- * \param[in] size - Entry name destination buffer
- *
- * \retval 0 on success
- * \retval Appropriate negative error code on failure
- */
-static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno,
- struct lu_fid *parent_fid, struct lu_name *ln)
-{
- unsigned int idx;
- int rc;
-
- rc = linkea_init_with_rec(ldata);
- if (rc < 0)
- return rc;
-
- if (linkno >= ldata->ld_leh->leh_reccount)
- /* beyond last link */
- return -ENODATA;
-
- linkea_first_entry(ldata);
- for (idx = 0; ldata->ld_lee; idx++) {
- linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, ln,
- parent_fid);
- if (idx == linkno)
- break;
-
- linkea_next_entry(ldata);
- }
-
- if (idx < linkno)
- return -ENODATA;
-
- return 0;
-}
-
-/**
- * Get parent FID and name of an identified link. Operation is performed for
- * a given link number, letting the caller iterate over linkno to list one or
- * all links of an entry.
- *
- * \param[in] file - File descriptor against which to perform the operation
- * \param[in,out] arg - User-filled structure containing the linkno to operate
- * on and the available size. It is eventually filled
- * with the requested information or left untouched on
- * error
- *
- * \retval - 0 on success
- * \retval - Appropriate negative error code on failure
- */
-int ll_getparent(struct file *file, struct getparent __user *arg)
-{
- struct inode *inode = file_inode(file);
- struct linkea_data *ldata;
- struct lu_fid parent_fid;
- struct lu_buf buf = {
- .lb_buf = NULL,
- .lb_len = 0
- };
- struct lu_name ln;
- u32 name_size;
- u32 linkno;
- int rc;
-
- if (!capable(CAP_DAC_READ_SEARCH) &&
- !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
- return -EPERM;
-
- if (get_user(name_size, &arg->gp_name_size))
- return -EFAULT;
-
- if (get_user(linkno, &arg->gp_linkno))
- return -EFAULT;
-
- if (name_size > PATH_MAX)
- return -EINVAL;
-
- ldata = kzalloc(sizeof(*ldata), GFP_NOFS);
- if (!ldata)
- return -ENOMEM;
-
- rc = linkea_data_new(ldata, &buf);
- if (rc < 0)
- goto ldata_free;
-
- rc = ll_xattr_list(inode, XATTR_NAME_LINK, XATTR_TRUSTED_T, buf.lb_buf,
- buf.lb_len, OBD_MD_FLXATTR);
- if (rc < 0)
- goto lb_free;
-
- rc = ll_linkea_decode(ldata, linkno, &parent_fid, &ln);
- if (rc < 0)
- goto lb_free;
-
- if (ln.ln_namelen >= name_size) {
- rc = -EOVERFLOW;
- goto lb_free;
- }
-
- if (copy_to_user(&arg->gp_fid, &parent_fid, sizeof(arg->gp_fid))) {
- rc = -EFAULT;
- goto lb_free;
- }
-
- if (copy_to_user(&arg->gp_name, ln.ln_name, ln.ln_namelen)) {
- rc = -EFAULT;
- goto lb_free;
- }
-
- if (put_user('\0', arg->gp_name + ln.ln_namelen)) {
- rc = -EFAULT;
- goto lb_free;
- }
-
-lb_free:
- kvfree(buf.lb_buf);
-ldata_free:
- kfree(ldata);
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
deleted file mode 100644
index 214b07554e62..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ /dev/null
@@ -1,478 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-static const struct vm_operations_struct ll_file_vm_ops;
-
-void policy_from_vma(union ldlm_policy_data *policy,
- struct vm_area_struct *vma, unsigned long addr,
- size_t count)
-{
- policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) +
- (vma->vm_pgoff << PAGE_SHIFT);
- policy->l_extent.end = (policy->l_extent.start + count - 1) |
- ~PAGE_MASK;
-}
-
-struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
- size_t count)
-{
- struct vm_area_struct *vma, *ret = NULL;
-
- /* mmap_sem must have been held by caller. */
- LASSERT(!down_write_trylock(&mm->mmap_sem));
-
- for (vma = find_vma(mm, addr);
- vma && vma->vm_start < (addr + count); vma = vma->vm_next) {
- if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
- vma->vm_flags & VM_SHARED) {
- ret = vma;
- break;
- }
- }
- return ret;
-}
-
-/**
- * API independent part for page fault initialization.
- * \param vma - virtual memory area addressed to page fault
- * \param env - corespondent lu_env to processing
- * \param index - page index corespondent to fault.
- * \parm ra_flags - vma readahead flags.
- *
- * \return error codes from cl_io_init.
- */
-static struct cl_io *
-ll_fault_io_init(struct lu_env *env, struct vm_area_struct *vma,
- pgoff_t index, unsigned long *ra_flags)
-{
- struct file *file = vma->vm_file;
- struct inode *inode = file_inode(file);
- struct cl_io *io;
- struct cl_fault_io *fio;
- int rc;
-
- if (ll_file_nolock(file))
- return ERR_PTR(-EOPNOTSUPP);
-
-restart:
- io = vvp_env_thread_io(env);
- io->ci_obj = ll_i2info(inode)->lli_clob;
- LASSERT(io->ci_obj);
-
- fio = &io->u.ci_fault;
- fio->ft_index = index;
- fio->ft_executable = vma->vm_flags & VM_EXEC;
-
- /*
- * disable VM_SEQ_READ and use VM_RAND_READ to make sure that
- * the kernel will not read other pages not covered by ldlm in
- * filemap_nopage. we do our readahead in ll_readpage.
- */
- if (ra_flags)
- *ra_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
- vma->vm_flags &= ~VM_SEQ_READ;
- vma->vm_flags |= VM_RAND_READ;
-
- CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags,
- fio->ft_index, fio->ft_executable);
-
- rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj);
- if (rc == 0) {
- struct vvp_io *vio = vvp_env_io(env);
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-
- LASSERT(vio->vui_cl.cis_io == io);
-
- /* mmap lock must be MANDATORY it has to cache pages. */
- io->ci_lockreq = CILR_MANDATORY;
- vio->vui_fd = fd;
- } else {
- LASSERT(rc < 0);
- cl_io_fini(env, io);
- if (io->ci_need_restart)
- goto restart;
-
- io = ERR_PTR(rc);
- }
-
- return io;
-}
-
-/* Sharing code of page_mkwrite method for rhel5 and rhel6 */
-static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
- bool *retry)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct vvp_io *vio;
- int result;
- u16 refcheck;
- sigset_t set;
- struct inode *inode;
- struct ll_inode_info *lli;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = ll_fault_io_init(env, vma, vmpage->index, NULL);
- if (IS_ERR(io)) {
- result = PTR_ERR(io);
- goto out;
- }
-
- result = io->ci_result;
- if (result < 0)
- goto out_io;
-
- io->u.ci_fault.ft_mkwrite = 1;
- io->u.ci_fault.ft_writable = 1;
-
- vio = vvp_env_io(env);
- vio->u.fault.ft_vma = vma;
- vio->u.fault.ft_vmpage = vmpage;
-
- cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM), &set);
-
- inode = vvp_object_inode(io->ci_obj);
- lli = ll_i2info(inode);
-
- result = cl_io_loop(env, io);
-
- cfs_restore_sigs(&set);
-
- if (result == 0) {
- struct inode *inode = file_inode(vma->vm_file);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- lock_page(vmpage);
- if (!vmpage->mapping) {
- unlock_page(vmpage);
-
- /* page was truncated and lock was cancelled, return
- * ENODATA so that VM_FAULT_NOPAGE will be returned
- * to handle_mm_fault().
- */
- if (result == 0)
- result = -ENODATA;
- } else if (!PageDirty(vmpage)) {
- /* race, the page has been cleaned by ptlrpcd after
- * it was unlocked, it has to be added into dirty
- * cache again otherwise this soon-to-dirty page won't
- * consume any grants, even worse if this page is being
- * transferred because it will break RPC checksum.
- */
- unlock_page(vmpage);
-
- CDEBUG(D_MMAP,
- "Race on page_mkwrite %p/%lu, page has been written out, retry.\n",
- vmpage, vmpage->index);
-
- *retry = true;
- result = -EAGAIN;
- }
-
- if (!result)
- set_bit(LLIF_DATA_MODIFIED, &lli->lli_flags);
- }
-
-out_io:
- cl_io_fini(env, io);
-out:
- cl_env_put(env, &refcheck);
- CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result);
- LASSERT(ergo(result == 0, PageLocked(vmpage)));
-
- return result;
-}
-
-static inline int to_fault_error(int result)
-{
- switch (result) {
- case 0:
- result = VM_FAULT_LOCKED;
- break;
- case -EFAULT:
- result = VM_FAULT_NOPAGE;
- break;
- case -ENOMEM:
- result = VM_FAULT_OOM;
- break;
- default:
- result = VM_FAULT_SIGBUS;
- break;
- }
- return result;
-}
-
-/**
- * Lustre implementation of a vm_operations_struct::fault() method, called by
- * VM to server page fault (both in kernel and user space).
- *
- * \param vma - is virtual area struct related to page fault
- * \param vmf - structure which describe type and address where hit fault
- *
- * \return allocated and filled _locked_ page for address
- * \retval VM_FAULT_ERROR on general error
- * \retval NOPAGE_OOM not have memory for allocate new page
- */
-static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct lu_env *env;
- struct cl_io *io;
- struct vvp_io *vio = NULL;
- struct page *vmpage;
- unsigned long ra_flags;
- int result = 0;
- int fault_ret = 0;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags);
- if (IS_ERR(io)) {
- result = to_fault_error(PTR_ERR(io));
- goto out;
- }
-
- result = io->ci_result;
- if (result == 0) {
- vio = vvp_env_io(env);
- vio->u.fault.ft_vma = vma;
- vio->u.fault.ft_vmpage = NULL;
- vio->u.fault.ft_vmf = vmf;
- vio->u.fault.ft_flags = 0;
- vio->u.fault.ft_flags_valid = false;
-
- /* May call ll_readpage() */
- ll_cl_add(vma->vm_file, env, io);
-
- result = cl_io_loop(env, io);
-
- ll_cl_remove(vma->vm_file, env);
-
- /* ft_flags are only valid if we reached
- * the call to filemap_fault
- */
- if (vio->u.fault.ft_flags_valid)
- fault_ret = vio->u.fault.ft_flags;
-
- vmpage = vio->u.fault.ft_vmpage;
- if (result != 0 && vmpage) {
- put_page(vmpage);
- vmf->page = NULL;
- }
- }
- cl_io_fini(env, io);
-
- vma->vm_flags |= ra_flags;
-
-out:
- cl_env_put(env, &refcheck);
- if (result != 0 && !(fault_ret & VM_FAULT_RETRY))
- fault_ret |= to_fault_error(result);
-
- CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result);
- return fault_ret;
-}
-
-static int ll_fault(struct vm_fault *vmf)
-{
- int count = 0;
- bool printed = false;
- int result;
- sigset_t set;
-
- /* Only SIGKILL and SIGTERM are allowed for fault/nopage/mkwrite
- * so that it can be killed by admin but not cause segfault by
- * other signals.
- */
- cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM), &set);
-
-restart:
- result = ll_fault0(vmf->vma, vmf);
- LASSERT(!(result & VM_FAULT_LOCKED));
- if (result == 0) {
- struct page *vmpage = vmf->page;
-
- /* check if this page has been truncated */
- lock_page(vmpage);
- if (unlikely(!vmpage->mapping)) { /* unlucky */
- unlock_page(vmpage);
- put_page(vmpage);
- vmf->page = NULL;
-
- if (!printed && ++count > 16) {
- CWARN("the page is under heavy contention, maybe your app(%s) needs revising :-)\n",
- current->comm);
- printed = true;
- }
-
- goto restart;
- }
-
- result = VM_FAULT_LOCKED;
- }
- cfs_restore_sigs(&set);
- return result;
-}
-
-static int ll_page_mkwrite(struct vm_fault *vmf)
-{
- struct vm_area_struct *vma = vmf->vma;
- int count = 0;
- bool printed = false;
- bool retry;
- int result;
-
- file_update_time(vma->vm_file);
- do {
- retry = false;
- result = ll_page_mkwrite0(vma, vmf->page, &retry);
-
- if (!printed && ++count > 16) {
- const struct dentry *de = vma->vm_file->f_path.dentry;
-
- CWARN("app(%s): the page %lu of file " DFID " is under heavy contention\n",
- current->comm, vmf->pgoff,
- PFID(ll_inode2fid(de->d_inode)));
- printed = true;
- }
- } while (retry);
-
- switch (result) {
- case 0:
- LASSERT(PageLocked(vmf->page));
- result = VM_FAULT_LOCKED;
- break;
- case -ENODATA:
- case -EAGAIN:
- case -EFAULT:
- result = VM_FAULT_NOPAGE;
- break;
- case -ENOMEM:
- result = VM_FAULT_OOM;
- break;
- default:
- result = VM_FAULT_SIGBUS;
- break;
- }
-
- return result;
-}
-
-/**
- * To avoid cancel the locks covering mmapped region for lock cache pressure,
- * we track the mapped vma count in vvp_object::vob_mmap_cnt.
- */
-static void ll_vm_open(struct vm_area_struct *vma)
-{
- struct inode *inode = file_inode(vma->vm_file);
- struct vvp_object *vob = cl_inode2vvp(inode);
-
- LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
- atomic_inc(&vob->vob_mmap_cnt);
-}
-
-/**
- * Dual to ll_vm_open().
- */
-static void ll_vm_close(struct vm_area_struct *vma)
-{
- struct inode *inode = file_inode(vma->vm_file);
- struct vvp_object *vob = cl_inode2vvp(inode);
-
- atomic_dec(&vob->vob_mmap_cnt);
- LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
-}
-
-/* XXX put nice comment here. talk about __free_pte -> dirty pages and
- * nopage's reference passing to the pte
- */
-int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last)
-{
- int rc = -ENOENT;
-
- LASSERTF(last > first, "last %llu first %llu\n", last, first);
- if (mapping_mapped(mapping)) {
- rc = 0;
- unmap_mapping_range(mapping, first + PAGE_SIZE - 1,
- last - first + 1, 0);
- }
-
- return rc;
-}
-
-static const struct vm_operations_struct ll_file_vm_ops = {
- .fault = ll_fault,
- .page_mkwrite = ll_page_mkwrite,
- .open = ll_vm_open,
- .close = ll_vm_close,
-};
-
-int ll_file_mmap(struct file *file, struct vm_area_struct *vma)
-{
- struct inode *inode = file_inode(file);
- int rc;
-
- if (ll_file_nolock(file))
- return -EOPNOTSUPP;
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_MAP, 1);
- rc = generic_file_mmap(file, vma);
- if (rc == 0) {
- vma->vm_ops = &ll_file_vm_ops;
- vma->vm_ops->open(vma);
- /* update the inode's size and mtime */
- rc = ll_glimpse_size(inode);
- }
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
deleted file mode 100644
index a6a1d80c711a..000000000000
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ /dev/null
@@ -1,375 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/lustre/llite/llite_nfs.c
- *
- * NFS export of Lustre Light File System
- *
- * Author: Yury Umanets <umka@clusterfs.com>
- * Author: Huang Hua <huanghua@clusterfs.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-#include "llite_internal.h"
-#include <linux/exportfs.h>
-
-__u32 get_uuid2int(const char *name, int len)
-{
- __u32 key0 = 0x12a3fe2d, key1 = 0x37abe8f9;
-
- while (len--) {
- __u32 key = key1 + (key0 ^ (*name++ * 7152373));
-
- if (key & 0x80000000)
- key -= 0x7fffffff;
- key1 = key0;
- key0 = key;
- }
- return (key0 << 1);
-}
-
-void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid)
-{
- __u64 key = 0, key0 = 0x12a3fe2d, key1 = 0x37abe8f9;
-
- while (len--) {
- key = key1 + (key0 ^ (*name++ * 7152373));
- if (key & 0x8000000000000000ULL)
- key -= 0x7fffffffffffffffULL;
- key1 = key0;
- key0 = key;
- }
-
- fsid->val[0] = key;
- fsid->val[1] = key >> 32;
-}
-
-struct inode *search_inode_for_lustre(struct super_block *sb,
- const struct lu_fid *fid)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct ptlrpc_request *req = NULL;
- struct inode *inode = NULL;
- int eadatalen = 0;
- unsigned long hash = cl_fid_build_ino(fid,
- ll_need_32bit_api(sbi));
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_INFO, "searching inode for:(%lu," DFID ")\n", hash, PFID(fid));
-
- inode = ilookup5(sb, hash, ll_test_inode_by_fid, (void *)fid);
- if (inode)
- return inode;
-
- rc = ll_get_default_mdsize(sbi, &eadatalen);
- if (rc)
- return ERR_PTR(rc);
-
- /* Because inode is NULL, ll_prep_md_op_data can not
- * be used here. So we allocate op_data ourselves
- */
- op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
- if (!op_data)
- return ERR_PTR(-ENOMEM);
-
- op_data->op_fid1 = *fid;
- op_data->op_mode = eadatalen;
- op_data->op_valid = OBD_MD_FLEASIZE;
-
- /* mds_fid2dentry ignores f_type */
- rc = md_getattr(sbi->ll_md_exp, op_data, &req);
- kfree(op_data);
- if (rc) {
- CDEBUG(D_INFO, "can't get object attrs, fid " DFID ", rc %d\n",
- PFID(fid), rc);
- return ERR_PTR(rc);
- }
- rc = ll_prep_inode(&inode, req, sb, NULL);
- ptlrpc_req_finished(req);
- if (rc)
- return ERR_PTR(rc);
-
- return inode;
-}
-
-struct lustre_nfs_fid {
- struct lu_fid lnf_child;
- struct lu_fid lnf_parent;
-};
-
-static struct dentry *
-ll_iget_for_nfs(struct super_block *sb,
- struct lu_fid *fid, struct lu_fid *parent)
-{
- struct inode *inode;
- struct dentry *result;
-
- if (!fid_is_sane(fid))
- return ERR_PTR(-ESTALE);
-
- CDEBUG(D_INFO, "Get dentry for fid: " DFID "\n", PFID(fid));
-
- inode = search_inode_for_lustre(sb, fid);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- if (is_bad_inode(inode)) {
- /* we didn't find the right inode.. */
- iput(inode);
- return ERR_PTR(-ESTALE);
- }
-
- result = d_obtain_alias(inode);
- if (IS_ERR(result)) {
- iput(inode);
- return result;
- }
-
- /**
- * In case d_obtain_alias() found a disconnected dentry, always update
- * lli_pfid to allow later operation (normally open) have parent fid,
- * which may be used by MDS to create data.
- */
- if (parent) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- spin_lock(&lli->lli_lock);
- lli->lli_pfid = *parent;
- spin_unlock(&lli->lli_lock);
- }
-
- /* N.B. d_obtain_alias() drops inode ref on error */
- result = d_obtain_alias(inode);
- if (!IS_ERR(result)) {
- /*
- * Need to signal to the ll_intent_file_open that
- * we came from NFS and so opencache needs to be
- * enabled for this one
- */
- ll_d2d(result)->lld_nfs_dentry = 1;
- }
-
- return result;
-}
-
-/**
- * \a connectable - is nfsd will connect himself or this should be done
- * at lustre
- *
- * The return value is file handle type:
- * 1 -- contains child file handle;
- * 2 -- contains child file handle and parent file handle;
- * 255 -- error.
- */
-static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
- struct inode *parent)
-{
- int fileid_len = sizeof(struct lustre_nfs_fid) / 4;
- struct lustre_nfs_fid *nfs_fid = (void *)fh;
-
- CDEBUG(D_INFO, "%s: encoding for (" DFID ") maxlen=%d minlen=%d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), *plen, fileid_len);
-
- if (*plen < fileid_len) {
- *plen = fileid_len;
- return FILEID_INVALID;
- }
-
- nfs_fid->lnf_child = *ll_inode2fid(inode);
- if (parent)
- nfs_fid->lnf_parent = *ll_inode2fid(parent);
- else
- fid_zero(&nfs_fid->lnf_parent);
- *plen = fileid_len;
-
- return FILEID_LUSTRE;
-}
-
-static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name,
- int namelen, loff_t hash, u64 ino,
- unsigned int type)
-{
- /* It is hack to access lde_fid for comparison with lgd_fid.
- * So the input 'name' must be part of the 'lu_dirent'.
- */
- struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name);
- struct ll_getname_data *lgd =
- container_of(ctx, struct ll_getname_data, ctx);
- struct lu_fid fid;
-
- fid_le_to_cpu(&fid, &lde->lde_fid);
- if (lu_fid_eq(&fid, &lgd->lgd_fid)) {
- memcpy(lgd->lgd_name, name, namelen);
- lgd->lgd_name[namelen] = 0;
- lgd->lgd_found = 1;
- }
- return lgd->lgd_found;
-}
-
-static int ll_get_name(struct dentry *dentry, char *name,
- struct dentry *child)
-{
- struct inode *dir = d_inode(dentry);
- int rc;
- struct ll_getname_data lgd = {
- .lgd_name = name,
- .lgd_fid = ll_i2info(d_inode(child))->lli_fid,
- .ctx.actor = ll_nfs_get_name_filldir,
- };
- struct md_op_data *op_data;
- __u64 pos = 0;
-
- if (!dir || !S_ISDIR(dir->i_mode)) {
- rc = -ENOTDIR;
- goto out;
- }
-
- if (!dir->i_fop) {
- rc = -EINVAL;
- goto out;
- }
-
- op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
- LUSTRE_OPC_ANY, dir);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
- inode_lock(dir);
- rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx);
- inode_unlock(dir);
- ll_finish_md_op_data(op_data);
- if (!rc && !lgd.lgd_found)
- rc = -ENOENT;
-out:
- return rc;
-}
-
-static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-
- if (fh_type != FILEID_LUSTRE)
- return ERR_PTR(-EPROTO);
-
- return ll_iget_for_nfs(sb, &nfs_fid->lnf_child, &nfs_fid->lnf_parent);
-}
-
-static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type)
-{
- struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-
- if (fh_type != FILEID_LUSTRE)
- return ERR_PTR(-EPROTO);
-
- return ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL);
-}
-
-int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid)
-{
- struct ptlrpc_request *req = NULL;
- struct ll_sb_info *sbi;
- struct mdt_body *body;
- static const char dotdot[] = "..";
- struct md_op_data *op_data;
- int rc;
- int lmmsize;
-
- LASSERT(dir && S_ISDIR(dir->i_mode));
-
- sbi = ll_s2sbi(dir->i_sb);
-
- CDEBUG(D_INFO, "%s: getting parent for (" DFID ")\n",
- ll_get_fsname(dir->i_sb, NULL, 0),
- PFID(ll_inode2fid(dir)));
-
- rc = ll_get_default_mdsize(sbi, &lmmsize);
- if (rc != 0)
- return rc;
-
- op_data = ll_prep_md_op_data(NULL, dir, NULL, dotdot,
- strlen(dotdot), lmmsize,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
- ll_finish_md_op_data(op_data);
- if (rc) {
- CERROR("%s: failure inode " DFID " get parent: rc = %d\n",
- ll_get_fsname(dir->i_sb, NULL, 0),
- PFID(ll_inode2fid(dir)), rc);
- return rc;
- }
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- /*
- * LU-3952: MDT may lost the FID of its parent, we should not crash
- * the NFS server, ll_iget_for_nfs() will handle the error.
- */
- if (body->mbo_valid & OBD_MD_FLID) {
- CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
- PFID(ll_inode2fid(dir)), PFID(&body->mbo_fid1));
- *parent_fid = body->mbo_fid1;
- }
-
- ptlrpc_req_finished(req);
- return 0;
-}
-
-static struct dentry *ll_get_parent(struct dentry *dchild)
-{
- struct lu_fid parent_fid = { 0 };
- struct dentry *dentry;
- int rc;
-
- rc = ll_dir_get_parent_fid(dchild->d_inode, &parent_fid);
- if (rc)
- return ERR_PTR(rc);
-
- dentry = ll_iget_for_nfs(dchild->d_inode->i_sb, &parent_fid, NULL);
-
- return dentry;
-}
-
-const struct export_operations lustre_export_operations = {
- .get_parent = ll_get_parent,
- .encode_fh = ll_encode_fh,
- .get_name = ll_get_name,
- .fh_to_dentry = ll_fh_to_dentry,
- .fh_to_parent = ll_fh_to_parent,
-};
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
deleted file mode 100644
index 644bea2f9d37..000000000000
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ /dev/null
@@ -1,1684 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <lprocfs_status.h>
-#include <linux/seq_file.h>
-#include <obd_support.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/* debugfs llite mount point registration */
-static const struct file_operations ll_rw_extents_stats_fops;
-static const struct file_operations ll_rw_extents_stats_pp_fops;
-static const struct file_operations ll_rw_offset_stats_fops;
-
-static ssize_t blocksize_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%u\n", osfs.os_bsize);
-
- return rc;
-}
-LUSTRE_RO_ATTR(blocksize);
-
-static ssize_t kbytestotal_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_blocks;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- rc = sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytestotal);
-
-static ssize_t kbytesfree_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_bfree;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- rc = sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytesfree);
-
-static ssize_t kbytesavail_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc) {
- __u32 blk_size = osfs.os_bsize >> 10;
- __u64 result = osfs.os_bavail;
-
- while (blk_size >>= 1)
- result <<= 1;
-
- rc = sprintf(buf, "%llu\n", result);
- }
-
- return rc;
-}
-LUSTRE_RO_ATTR(kbytesavail);
-
-static ssize_t filestotal_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%llu\n", osfs.os_files);
-
- return rc;
-}
-LUSTRE_RO_ATTR(filestotal);
-
-static ssize_t filesfree_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- struct obd_statfs osfs;
- int rc;
-
- rc = ll_statfs_internal(sbi->ll_sb, &osfs,
- cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
- OBD_STATFS_NODELAY);
- if (!rc)
- return sprintf(buf, "%llu\n", osfs.os_ffree);
-
- return rc;
-}
-LUSTRE_RO_ATTR(filesfree);
-
-static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- return sprintf(buf, "local client\n");
-}
-LUSTRE_RO_ATTR(client_type);
-
-static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%s\n", sbi->ll_sb->s_type->name);
-}
-LUSTRE_RO_ATTR(fstype);
-
-static ssize_t uuid_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%s\n", sbi->ll_sb_uuid.uuid);
-}
-LUSTRE_RO_ATTR(uuid);
-
-static int ll_site_stats_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
-
- /*
- * See description of statistical counters in struct cl_site, and
- * struct lu_site.
- */
- return cl_site_stats_print(lu2cl_site(ll_s2sbi(sb)->ll_site), m);
-}
-
-LPROC_SEQ_FOPS_RO(ll_site_stats);
-
-static ssize_t max_read_ahead_mb_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- long pages_number;
- int mult;
-
- spin_lock(&sbi->ll_lock);
- pages_number = sbi->ll_ra_info.ra_max_pages;
- spin_unlock(&sbi->ll_lock);
-
- mult = 1 << (20 - PAGE_SHIFT);
- return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
-}
-
-static ssize_t max_read_ahead_mb_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pages_number;
-
- rc = kstrtoul(buffer, 10, &pages_number);
- if (rc)
- return rc;
-
- pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */
-
- if (pages_number > totalram_pages / 2) {
- CERROR("can't set file readahead more than %lu MB\n",
- totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- sbi->ll_ra_info.ra_max_pages = pages_number;
- spin_unlock(&sbi->ll_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_read_ahead_mb);
-
-static ssize_t max_read_ahead_per_file_mb_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- long pages_number;
- int mult;
-
- spin_lock(&sbi->ll_lock);
- pages_number = sbi->ll_ra_info.ra_max_pages_per_file;
- spin_unlock(&sbi->ll_lock);
-
- mult = 1 << (20 - PAGE_SHIFT);
- return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
-}
-
-static ssize_t max_read_ahead_per_file_mb_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pages_number;
-
- rc = kstrtoul(buffer, 10, &pages_number);
- if (rc)
- return rc;
-
- if (pages_number > sbi->ll_ra_info.ra_max_pages) {
- CERROR("can't set file readahead more than max_read_ahead_mb %lu MB\n",
- sbi->ll_ra_info.ra_max_pages);
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- sbi->ll_ra_info.ra_max_pages_per_file = pages_number;
- spin_unlock(&sbi->ll_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_read_ahead_per_file_mb);
-
-static ssize_t max_read_ahead_whole_mb_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- long pages_number;
- int mult;
-
- spin_lock(&sbi->ll_lock);
- pages_number = sbi->ll_ra_info.ra_max_read_ahead_whole_pages;
- spin_unlock(&sbi->ll_lock);
-
- mult = 1 << (20 - PAGE_SHIFT);
- return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult);
-}
-
-static ssize_t max_read_ahead_whole_mb_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pages_number;
-
- rc = kstrtoul(buffer, 10, &pages_number);
- if (rc)
- return rc;
-
- /* Cap this at the current max readahead window size, the readahead
- * algorithm does this anyway so it's pointless to set it larger.
- */
- if (pages_number > sbi->ll_ra_info.ra_max_pages_per_file) {
- CERROR("can't set max_read_ahead_whole_mb more than max_read_ahead_per_file_mb: %lu\n",
- sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_SHIFT));
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- sbi->ll_ra_info.ra_max_read_ahead_whole_pages = pages_number;
- spin_unlock(&sbi->ll_lock);
-
- return count;
-}
-LUSTRE_RW_ATTR(max_read_ahead_whole_mb);
-
-static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = sbi->ll_cache;
- int shift = 20 - PAGE_SHIFT;
- long max_cached_mb;
- long unused_mb;
-
- max_cached_mb = cache->ccc_lru_max >> shift;
- unused_mb = atomic_long_read(&cache->ccc_lru_left) >> shift;
- seq_printf(m,
- "users: %d\n"
- "max_cached_mb: %ld\n"
- "used_mb: %ld\n"
- "unused_mb: %ld\n"
- "reclaim_count: %u\n",
- atomic_read(&cache->ccc_users),
- max_cached_mb,
- max_cached_mb - unused_mb,
- unused_mb,
- cache->ccc_lru_shrinkers);
- return 0;
-}
-
-static ssize_t ll_max_cached_mb_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct super_block *sb = ((struct seq_file *)file->private_data)->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = sbi->ll_cache;
- struct lu_env *env;
- long diff = 0;
- long nrpages = 0;
- u16 refcheck;
- long pages_number;
- int mult;
- long rc;
- u64 val;
- char kernbuf[128];
-
- if (count >= sizeof(kernbuf))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
- kernbuf[count] = 0;
-
- mult = 1 << (20 - PAGE_SHIFT);
- buffer += lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count) -
- kernbuf;
- rc = lprocfs_write_frac_u64_helper(buffer, count, &val, mult);
- if (rc)
- return rc;
-
- if (val > LONG_MAX)
- return -ERANGE;
- pages_number = (long)val;
-
- if (pages_number < 0 || pages_number > totalram_pages) {
- CERROR("%s: can't set max cache more than %lu MB\n",
- ll_get_fsname(sb, NULL, 0),
- totalram_pages >> (20 - PAGE_SHIFT));
- return -ERANGE;
- }
-
- spin_lock(&sbi->ll_lock);
- diff = pages_number - cache->ccc_lru_max;
- spin_unlock(&sbi->ll_lock);
-
- /* easy - add more LRU slots. */
- if (diff >= 0) {
- atomic_long_add(diff, &cache->ccc_lru_left);
- rc = 0;
- goto out;
- }
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return 0;
-
- diff = -diff;
- while (diff > 0) {
- long tmp;
-
- /* reduce LRU budget from free slots. */
- do {
- long ov, nv;
-
- ov = atomic_long_read(&cache->ccc_lru_left);
- if (ov == 0)
- break;
-
- nv = ov > diff ? ov - diff : 0;
- rc = atomic_long_cmpxchg(&cache->ccc_lru_left, ov, nv);
- if (likely(ov == rc)) {
- diff -= ov - nv;
- nrpages += ov - nv;
- break;
- }
- } while (1);
-
- if (diff <= 0)
- break;
-
- if (!sbi->ll_dt_exp) { /* being initialized */
- rc = 0;
- goto out;
- }
-
- /* difficult - have to ask OSCs to drop LRU slots. */
- tmp = diff << 1;
- rc = obd_set_info_async(env, sbi->ll_dt_exp,
- sizeof(KEY_CACHE_LRU_SHRINK),
- KEY_CACHE_LRU_SHRINK,
- sizeof(tmp), &tmp, NULL);
- if (rc < 0)
- break;
- }
- cl_env_put(env, &refcheck);
-
-out:
- if (rc >= 0) {
- spin_lock(&sbi->ll_lock);
- cache->ccc_lru_max = pages_number;
- spin_unlock(&sbi->ll_lock);
- rc = count;
- } else {
- atomic_long_add(nrpages, &cache->ccc_lru_left);
- }
- return rc;
-}
-
-LPROC_SEQ_FOPS(ll_max_cached_mb);
-
-static ssize_t checksum_pages_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", (sbi->ll_flags & LL_SBI_CHECKSUM) ? 1 : 0);
-}
-
-static ssize_t checksum_pages_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- if (!sbi->ll_dt_exp)
- /* Not set up yet */
- return -EAGAIN;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
- if (val)
- sbi->ll_flags |= LL_SBI_CHECKSUM;
- else
- sbi->ll_flags &= ~LL_SBI_CHECKSUM;
-
- rc = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
- KEY_CHECKSUM, sizeof(val), &val, NULL);
- if (rc)
- CWARN("Failed to set OSC checksum flags: %d\n", rc);
-
- return count;
-}
-LUSTRE_RW_ATTR(checksum_pages);
-
-static ssize_t ll_rd_track_id(struct kobject *kobj, char *buf,
- enum stats_track_type type)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- if (sbi->ll_stats_track_type == type)
- return sprintf(buf, "%d\n", sbi->ll_stats_track_id);
- else if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
- return sprintf(buf, "0 (all)\n");
- else
- return sprintf(buf, "untracked\n");
-}
-
-static ssize_t ll_wr_track_id(struct kobject *kobj, const char *buffer,
- size_t count,
- enum stats_track_type type)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long pid;
-
- rc = kstrtoul(buffer, 10, &pid);
- if (rc)
- return rc;
- sbi->ll_stats_track_id = pid;
- if (pid == 0)
- sbi->ll_stats_track_type = STATS_TRACK_ALL;
- else
- sbi->ll_stats_track_type = type;
- lprocfs_clear_stats(sbi->ll_stats);
- return count;
-}
-
-static ssize_t stats_track_pid_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- return ll_rd_track_id(kobj, buf, STATS_TRACK_PID);
-}
-
-static ssize_t stats_track_pid_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_PID);
-}
-LUSTRE_RW_ATTR(stats_track_pid);
-
-static ssize_t stats_track_ppid_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- return ll_rd_track_id(kobj, buf, STATS_TRACK_PPID);
-}
-
-static ssize_t stats_track_ppid_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_PPID);
-}
-LUSTRE_RW_ATTR(stats_track_ppid);
-
-static ssize_t stats_track_gid_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- return ll_rd_track_id(kobj, buf, STATS_TRACK_GID);
-}
-
-static ssize_t stats_track_gid_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_GID);
-}
-LUSTRE_RW_ATTR(stats_track_gid);
-
-static ssize_t statahead_max_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_sa_max);
-}
-
-static ssize_t statahead_max_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val <= LL_SA_RPC_MAX)
- sbi->ll_sa_max = val;
- else
- CERROR("Bad statahead_max value %lu. Valid values are in the range [0, %d]\n",
- val, LL_SA_RPC_MAX);
-
- return count;
-}
-LUSTRE_RW_ATTR(statahead_max);
-
-static ssize_t statahead_agl_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_flags & LL_SBI_AGL_ENABLED ? 1 : 0);
-}
-
-static ssize_t statahead_agl_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val)
- sbi->ll_flags |= LL_SBI_AGL_ENABLED;
- else
- sbi->ll_flags &= ~LL_SBI_AGL_ENABLED;
-
- return count;
-}
-LUSTRE_RW_ATTR(statahead_agl);
-
-static int ll_statahead_stats_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-
- seq_printf(m,
- "statahead total: %u\n"
- "statahead wrong: %u\n"
- "agl total: %u\n",
- atomic_read(&sbi->ll_sa_total),
- atomic_read(&sbi->ll_sa_wrong),
- atomic_read(&sbi->ll_agl_total));
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(ll_statahead_stats);
-
-static ssize_t lazystatfs_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_flags & LL_SBI_LAZYSTATFS ? 1 : 0);
-}
-
-static ssize_t lazystatfs_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val)
- sbi->ll_flags |= LL_SBI_LAZYSTATFS;
- else
- sbi->ll_flags &= ~LL_SBI_LAZYSTATFS;
-
- return count;
-}
-LUSTRE_RW_ATTR(lazystatfs);
-
-static ssize_t max_easize_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- unsigned int ealen;
- int rc;
-
- rc = ll_get_max_mdsize(sbi, &ealen);
- if (rc)
- return rc;
-
- return sprintf(buf, "%u\n", ealen);
-}
-LUSTRE_RO_ATTR(max_easize);
-
-/**
- * Get default_easize.
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] kobj kernel object for sysfs tree
- * \param[in] attr attribute of this kernel object
- * \param[in] buf buffer to write data into
- *
- * \retval positive \a count on success
- * \retval negative negated errno on failure
- */
-static ssize_t default_easize_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- unsigned int ealen;
- int rc;
-
- rc = ll_get_default_mdsize(sbi, &ealen);
- if (rc)
- return rc;
-
- return sprintf(buf, "%u\n", ealen);
-}
-
-/**
- * Set default_easize.
- *
- * Range checking on the passed value is handled by
- * ll_set_default_mdsize().
- *
- * \see client_obd::cl_default_mds_easize
- *
- * \param[in] kobj kernel object for sysfs tree
- * \param[in] attr attribute of this kernel object
- * \param[in] buffer string passed from user space
- * \param[in] count \a buffer length
- *
- * \retval positive \a count on success
- * \retval negative negated errno on failure
- */
-static ssize_t default_easize_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- unsigned long val;
- int rc;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- rc = ll_set_default_mdsize(sbi, val);
- if (rc)
- return rc;
-
- return count;
-}
-LUSTRE_RW_ATTR(default_easize);
-
-static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
-{
- const char *str[] = LL_SBI_FLAGS;
- struct super_block *sb = m->private;
- int flags = ll_s2sbi(sb)->ll_flags;
- int i = 0;
-
- while (flags != 0) {
- if (ARRAY_SIZE(str) <= i) {
- CERROR("%s: Revise array LL_SBI_FLAGS to match sbi flags please.\n",
- ll_get_fsname(sb, NULL, 0));
- return -EINVAL;
- }
-
- if (flags & 0x1)
- seq_printf(m, "%s ", str[i]);
- flags >>= 1;
- ++i;
- }
- seq_puts(m, "\b\n");
- return 0;
-}
-
-LPROC_SEQ_FOPS_RO(ll_sbi_flags);
-
-static ssize_t xattr_cache_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
-
- return sprintf(buf, "%u\n", sbi->ll_xattr_cache_enabled);
-}
-
-static ssize_t xattr_cache_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buffer,
- size_t count)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- int rc;
- unsigned long val;
-
- rc = kstrtoul(buffer, 10, &val);
- if (rc)
- return rc;
-
- if (val != 0 && val != 1)
- return -ERANGE;
-
- if (val == 1 && !(sbi->ll_flags & LL_SBI_XATTR_CACHE))
- return -ENOTSUPP;
-
- sbi->ll_xattr_cache_enabled = val;
-
- return count;
-}
-LUSTRE_RW_ATTR(xattr_cache);
-
-static int ll_unstable_stats_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct cl_client_cache *cache = sbi->ll_cache;
- long pages;
- int mb;
-
- pages = atomic_long_read(&cache->ccc_unstable_nr);
- mb = (pages * PAGE_SIZE) >> 20;
-
- seq_printf(m,
- "unstable_check: %8d\n"
- "unstable_pages: %12ld\n"
- "unstable_mb: %8d\n",
- cache->ccc_unstable_check, pages, mb);
-
- return 0;
-}
-
-static ssize_t ll_unstable_stats_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct super_block *sb = ((struct seq_file *)file->private_data)->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- char kernbuf[128];
- int val, rc;
-
- if (!count)
- return 0;
- if (count >= sizeof(kernbuf))
- return -EINVAL;
-
- if (copy_from_user(kernbuf, buffer, count))
- return -EFAULT;
- kernbuf[count] = 0;
-
- buffer += lprocfs_find_named_value(kernbuf, "unstable_check:", &count) -
- kernbuf;
- rc = lprocfs_write_helper(buffer, count, &val);
- if (rc < 0)
- return rc;
-
- /* borrow lru lock to set the value */
- spin_lock(&sbi->ll_cache->ccc_lru_lock);
- sbi->ll_cache->ccc_unstable_check = !!val;
- spin_unlock(&sbi->ll_cache->ccc_lru_lock);
-
- return count;
-}
-LPROC_SEQ_FOPS(ll_unstable_stats);
-
-static int ll_root_squash_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
-
- seq_printf(m, "%u:%u\n", squash->rsi_uid, squash->rsi_gid);
- return 0;
-}
-
-static ssize_t ll_root_squash_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct seq_file *m = file->private_data;
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
-
- return lprocfs_wr_root_squash(buffer, count, squash,
- ll_get_fsname(sb, NULL, 0));
-}
-LPROC_SEQ_FOPS(ll_root_squash);
-
-static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v)
-{
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
- int len;
-
- down_read(&squash->rsi_sem);
- if (!list_empty(&squash->rsi_nosquash_nids)) {
- len = cfs_print_nidlist(m->buf + m->count, m->size - m->count,
- &squash->rsi_nosquash_nids);
- m->count += len;
- seq_puts(m, "\n");
- } else {
- seq_puts(m, "NONE\n");
- }
- up_read(&squash->rsi_sem);
-
- return 0;
-}
-
-static ssize_t ll_nosquash_nids_seq_write(struct file *file,
- const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct seq_file *m = file->private_data;
- struct super_block *sb = m->private;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct root_squash_info *squash = &sbi->ll_squash;
- int rc;
-
- rc = lprocfs_wr_nosquash_nids(buffer, count, squash,
- ll_get_fsname(sb, NULL, 0));
- if (rc < 0)
- return rc;
-
- ll_compute_rootsquash_state(sbi);
-
- return rc;
-}
-
-LPROC_SEQ_FOPS(ll_nosquash_nids);
-
-static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
- /* { "mntpt_path", ll_rd_path, 0, 0 }, */
- { "site", &ll_site_stats_fops, NULL, 0 },
- /* { "filegroups", lprocfs_rd_filegroups, 0, 0 }, */
- { "max_cached_mb", &ll_max_cached_mb_fops, NULL },
- { "statahead_stats", &ll_statahead_stats_fops, NULL, 0 },
- { "unstable_stats", &ll_unstable_stats_fops, NULL },
- { "sbi_flags", &ll_sbi_flags_fops, NULL, 0 },
- { .name = "root_squash",
- .fops = &ll_root_squash_fops },
- { .name = "nosquash_nids",
- .fops = &ll_nosquash_nids_fops },
- { NULL }
-};
-
-#define MAX_STRING_SIZE 128
-
-static struct attribute *llite_attrs[] = {
- &lustre_attr_blocksize.attr,
- &lustre_attr_kbytestotal.attr,
- &lustre_attr_kbytesfree.attr,
- &lustre_attr_kbytesavail.attr,
- &lustre_attr_filestotal.attr,
- &lustre_attr_filesfree.attr,
- &lustre_attr_client_type.attr,
- &lustre_attr_fstype.attr,
- &lustre_attr_uuid.attr,
- &lustre_attr_max_read_ahead_mb.attr,
- &lustre_attr_max_read_ahead_per_file_mb.attr,
- &lustre_attr_max_read_ahead_whole_mb.attr,
- &lustre_attr_checksum_pages.attr,
- &lustre_attr_stats_track_pid.attr,
- &lustre_attr_stats_track_ppid.attr,
- &lustre_attr_stats_track_gid.attr,
- &lustre_attr_statahead_max.attr,
- &lustre_attr_statahead_agl.attr,
- &lustre_attr_lazystatfs.attr,
- &lustre_attr_max_easize.attr,
- &lustre_attr_default_easize.attr,
- &lustre_attr_xattr_cache.attr,
- NULL,
-};
-
-static void llite_sb_release(struct kobject *kobj)
-{
- struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
- ll_kobj);
- complete(&sbi->ll_kobj_unregister);
-}
-
-static struct kobj_type llite_ktype = {
- .default_attrs = llite_attrs,
- .sysfs_ops = &lustre_sysfs_ops,
- .release = llite_sb_release,
-};
-
-static const struct llite_file_opcode {
- __u32 opcode;
- __u32 type;
- const char *opname;
-} llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
- /* file operation */
- { LPROC_LL_DIRTY_HITS, LPROCFS_TYPE_REGS, "dirty_pages_hits" },
- { LPROC_LL_DIRTY_MISSES, LPROCFS_TYPE_REGS, "dirty_pages_misses" },
- { LPROC_LL_READ_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
- "read_bytes" },
- { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
- "write_bytes" },
- { LPROC_LL_BRW_READ, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
- "brw_read" },
- { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
- "brw_write" },
- { LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" },
- { LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" },
- { LPROC_LL_RELEASE, LPROCFS_TYPE_REGS, "close" },
- { LPROC_LL_MAP, LPROCFS_TYPE_REGS, "mmap" },
- { LPROC_LL_LLSEEK, LPROCFS_TYPE_REGS, "seek" },
- { LPROC_LL_FSYNC, LPROCFS_TYPE_REGS, "fsync" },
- { LPROC_LL_READDIR, LPROCFS_TYPE_REGS, "readdir" },
- /* inode operation */
- { LPROC_LL_SETATTR, LPROCFS_TYPE_REGS, "setattr" },
- { LPROC_LL_TRUNC, LPROCFS_TYPE_REGS, "truncate" },
- { LPROC_LL_FLOCK, LPROCFS_TYPE_REGS, "flock" },
- { LPROC_LL_GETATTR, LPROCFS_TYPE_REGS, "getattr" },
- /* dir inode operation */
- { LPROC_LL_CREATE, LPROCFS_TYPE_REGS, "create" },
- { LPROC_LL_LINK, LPROCFS_TYPE_REGS, "link" },
- { LPROC_LL_UNLINK, LPROCFS_TYPE_REGS, "unlink" },
- { LPROC_LL_SYMLINK, LPROCFS_TYPE_REGS, "symlink" },
- { LPROC_LL_MKDIR, LPROCFS_TYPE_REGS, "mkdir" },
- { LPROC_LL_RMDIR, LPROCFS_TYPE_REGS, "rmdir" },
- { LPROC_LL_MKNOD, LPROCFS_TYPE_REGS, "mknod" },
- { LPROC_LL_RENAME, LPROCFS_TYPE_REGS, "rename" },
- /* special inode operation */
- { LPROC_LL_STAFS, LPROCFS_TYPE_REGS, "statfs" },
- { LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" },
- { LPROC_LL_SETXATTR, LPROCFS_TYPE_REGS, "setxattr" },
- { LPROC_LL_GETXATTR, LPROCFS_TYPE_REGS, "getxattr" },
- { LPROC_LL_GETXATTR_HITS, LPROCFS_TYPE_REGS, "getxattr_hits" },
- { LPROC_LL_LISTXATTR, LPROCFS_TYPE_REGS, "listxattr" },
- { LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_REGS, "removexattr" },
- { LPROC_LL_INODE_PERM, LPROCFS_TYPE_REGS, "inode_permission" },
-};
-
-void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count)
-{
- if (!sbi->ll_stats)
- return;
- if (sbi->ll_stats_track_type == STATS_TRACK_ALL)
- lprocfs_counter_add(sbi->ll_stats, op, count);
- else if (sbi->ll_stats_track_type == STATS_TRACK_PID &&
- sbi->ll_stats_track_id == current->pid)
- lprocfs_counter_add(sbi->ll_stats, op, count);
- else if (sbi->ll_stats_track_type == STATS_TRACK_PPID &&
- sbi->ll_stats_track_id == current->real_parent->pid)
- lprocfs_counter_add(sbi->ll_stats, op, count);
- else if (sbi->ll_stats_track_type == STATS_TRACK_GID &&
- sbi->ll_stats_track_id ==
- from_kgid(&init_user_ns, current_gid()))
- lprocfs_counter_add(sbi->ll_stats, op, count);
-}
-EXPORT_SYMBOL(ll_stats_ops_tally);
-
-static const char *ra_stat_string[] = {
- [RA_STAT_HIT] = "hits",
- [RA_STAT_MISS] = "misses",
- [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive",
- [RA_STAT_MISS_IN_WINDOW] = "miss inside window",
- [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page",
- [RA_STAT_FAILED_MATCH] = "failed lock match",
- [RA_STAT_DISCARDED] = "read but discarded",
- [RA_STAT_ZERO_LEN] = "zero length file",
- [RA_STAT_ZERO_WINDOW] = "zero size window",
- [RA_STAT_EOF] = "read-ahead to EOF",
- [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue",
- [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page",
- [RA_STAT_FAILED_REACH_END] = "failed to reach end"
-};
-
-int ldebugfs_register_mountpoint(struct dentry *parent,
- struct super_block *sb, char *osc, char *mdc)
-{
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct dentry *dir;
- char name[MAX_STRING_SIZE + 1], *ptr;
- int err, id, len, rc;
-
- name[MAX_STRING_SIZE] = '\0';
-
- LASSERT(sbi);
- LASSERT(mdc);
- LASSERT(osc);
-
- /* Get fsname */
- len = strlen(lsi->lsi_lmd->lmd_profile);
- ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
- if (ptr && (strcmp(ptr, "-client") == 0))
- len -= 7;
-
- /* Mount info */
- snprintf(name, MAX_STRING_SIZE, "%.*s-%p", len,
- lsi->lsi_lmd->lmd_profile, sb);
-
- dir = ldebugfs_register(name, parent, NULL, NULL);
- if (IS_ERR_OR_NULL(dir)) {
- err = dir ? PTR_ERR(dir) : -ENOMEM;
- sbi->ll_debugfs_entry = NULL;
- return err;
- }
- sbi->ll_debugfs_entry = dir;
-
- rc = ldebugfs_seq_create(sbi->ll_debugfs_entry, "dump_page_cache", 0444,
- &vvp_dump_pgcache_file_ops, sbi);
- if (rc)
- CWARN("Error adding the dump_page_cache file\n");
-
- rc = ldebugfs_seq_create(sbi->ll_debugfs_entry, "extents_stats", 0644,
- &ll_rw_extents_stats_fops, sbi);
- if (rc)
- CWARN("Error adding the extent_stats file\n");
-
- rc = ldebugfs_seq_create(sbi->ll_debugfs_entry,
- "extents_stats_per_process",
- 0644, &ll_rw_extents_stats_pp_fops, sbi);
- if (rc)
- CWARN("Error adding the extents_stats_per_process file\n");
-
- rc = ldebugfs_seq_create(sbi->ll_debugfs_entry, "offset_stats", 0644,
- &ll_rw_offset_stats_fops, sbi);
- if (rc)
- CWARN("Error adding the offset_stats file\n");
-
- /* File operations stats */
- sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES,
- LPROCFS_STATS_FLAG_NONE);
- if (!sbi->ll_stats) {
- err = -ENOMEM;
- goto out;
- }
- /* do counter init */
- for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) {
- __u32 type = llite_opcode_table[id].type;
- void *ptr = NULL;
-
- if (type & LPROCFS_TYPE_REGS)
- ptr = "regs";
- else if (type & LPROCFS_TYPE_BYTES)
- ptr = "bytes";
- else if (type & LPROCFS_TYPE_PAGES)
- ptr = "pages";
- lprocfs_counter_init(sbi->ll_stats,
- llite_opcode_table[id].opcode,
- (type & LPROCFS_CNTR_AVGMINMAX),
- llite_opcode_table[id].opname, ptr);
- }
- err = ldebugfs_register_stats(sbi->ll_debugfs_entry, "stats",
- sbi->ll_stats);
- if (err)
- goto out;
-
- sbi->ll_ra_stats = lprocfs_alloc_stats(ARRAY_SIZE(ra_stat_string),
- LPROCFS_STATS_FLAG_NONE);
- if (!sbi->ll_ra_stats) {
- err = -ENOMEM;
- goto out;
- }
-
- for (id = 0; id < ARRAY_SIZE(ra_stat_string); id++)
- lprocfs_counter_init(sbi->ll_ra_stats, id, 0,
- ra_stat_string[id], "pages");
-
- err = ldebugfs_register_stats(sbi->ll_debugfs_entry, "read_ahead_stats",
- sbi->ll_ra_stats);
- if (err)
- goto out;
-
- err = ldebugfs_add_vars(sbi->ll_debugfs_entry,
- lprocfs_llite_obd_vars, sb);
- if (err)
- goto out;
-
- sbi->ll_kobj.kset = llite_kset;
- init_completion(&sbi->ll_kobj_unregister);
- err = kobject_init_and_add(&sbi->ll_kobj, &llite_ktype, NULL,
- "%s", name);
- if (err)
- goto out;
-
- /* MDC info */
- obd = class_name2obd(mdc);
-
- err = sysfs_create_link(&sbi->ll_kobj, &obd->obd_kobj,
- obd->obd_type->typ_name);
- if (err)
- goto out;
-
- /* OSC */
- obd = class_name2obd(osc);
-
- err = sysfs_create_link(&sbi->ll_kobj, &obd->obd_kobj,
- obd->obd_type->typ_name);
-out:
- if (err) {
- ldebugfs_remove(&sbi->ll_debugfs_entry);
- lprocfs_free_stats(&sbi->ll_ra_stats);
- lprocfs_free_stats(&sbi->ll_stats);
- }
- return err;
-}
-
-void ldebugfs_unregister_mountpoint(struct ll_sb_info *sbi)
-{
- if (sbi->ll_debugfs_entry) {
- ldebugfs_remove(&sbi->ll_debugfs_entry);
- kobject_put(&sbi->ll_kobj);
- wait_for_completion(&sbi->ll_kobj_unregister);
- lprocfs_free_stats(&sbi->ll_ra_stats);
- lprocfs_free_stats(&sbi->ll_stats);
- }
-}
-
-#undef MAX_STRING_SIZE
-
-#define pct(a, b) (b ? a * 100 / b : 0)
-
-static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
- struct seq_file *seq, int which)
-{
- unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
- unsigned long start, end, r, w;
- char *unitp = "KMGTPEZY";
- int i, units = 10;
- struct per_process_info *pp_info = &io_extents->pp_extents[which];
-
- read_cum = 0;
- write_cum = 0;
- start = 0;
-
- for (i = 0; i < LL_HIST_MAX; i++) {
- read_tot += pp_info->pp_r_hist.oh_buckets[i];
- write_tot += pp_info->pp_w_hist.oh_buckets[i];
- }
-
- for (i = 0; i < LL_HIST_MAX; i++) {
- r = pp_info->pp_r_hist.oh_buckets[i];
- w = pp_info->pp_w_hist.oh_buckets[i];
- read_cum += r;
- write_cum += w;
- end = 1 << (i + LL_HIST_START - units);
- seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu | %14lu %4lu %4lu\n",
- start, *unitp, end, *unitp,
- (i == LL_HIST_MAX - 1) ? '+' : ' ',
- r, pct(r, read_tot), pct(read_cum, read_tot),
- w, pct(w, write_tot), pct(write_cum, write_tot));
- start = end;
- if (start == 1024) {
- start = 1;
- units += 10;
- unitp++;
- }
- if (read_cum == read_tot && write_cum == write_tot)
- break;
- }
-}
-
-static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
- int k;
-
- ktime_get_real_ts64(&now);
-
- if (!sbi->ll_rw_stats_on) {
- seq_printf(seq, "disabled\n"
- "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n");
- return 0;
- }
- seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n",
- (s64)now.tv_sec, (unsigned long)now.tv_nsec);
- seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
- seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
- "extents", "calls", "%", "cum%",
- "calls", "%", "cum%");
- spin_lock(&sbi->ll_pp_extent_lock);
- for (k = 0; k < LL_PROCESS_HIST_MAX; k++) {
- if (io_extents->pp_extents[k].pid != 0) {
- seq_printf(seq, "\nPID: %d\n",
- io_extents->pp_extents[k].pid);
- ll_display_extents_info(io_extents, seq, k);
- }
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
- return 0;
-}
-
-static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file,
- const char __user *buf,
- size_t len,
- loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
- int i;
- int value = 1, rc = 0;
-
- if (len == 0)
- return -EINVAL;
-
- rc = lprocfs_write_helper(buf, len, &value);
- if (rc < 0 && len < 16) {
- char kernbuf[16];
-
- if (copy_from_user(kernbuf, buf, len))
- return -EFAULT;
- kernbuf[len] = 0;
-
- if (kernbuf[len - 1] == '\n')
- kernbuf[len - 1] = 0;
-
- if (strcmp(kernbuf, "disabled") == 0 ||
- strcmp(kernbuf, "Disabled") == 0)
- value = 0;
- }
-
- if (value == 0)
- sbi->ll_rw_stats_on = 0;
- else
- sbi->ll_rw_stats_on = 1;
-
- spin_lock(&sbi->ll_pp_extent_lock);
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- io_extents->pp_extents[i].pid = 0;
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
- return len;
-}
-
-LPROC_SEQ_FOPS(ll_rw_extents_stats_pp);
-
-static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
-
- ktime_get_real_ts64(&now);
-
- if (!sbi->ll_rw_stats_on) {
- seq_printf(seq, "disabled\n"
- "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n");
- return 0;
- }
- seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n",
- (u64)now.tv_sec, (unsigned long)now.tv_nsec);
-
- seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write");
- seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n",
- "extents", "calls", "%", "cum%",
- "calls", "%", "cum%");
- spin_lock(&sbi->ll_lock);
- ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX);
- spin_unlock(&sbi->ll_lock);
-
- return 0;
-}
-
-static ssize_t ll_rw_extents_stats_seq_write(struct file *file,
- const char __user *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
- int i;
- int value = 1, rc = 0;
-
- if (len == 0)
- return -EINVAL;
-
- rc = lprocfs_write_helper(buf, len, &value);
- if (rc < 0 && len < 16) {
- char kernbuf[16];
-
- if (copy_from_user(kernbuf, buf, len))
- return -EFAULT;
- kernbuf[len] = 0;
-
- if (kernbuf[len - 1] == '\n')
- kernbuf[len - 1] = 0;
-
- if (strcmp(kernbuf, "disabled") == 0 ||
- strcmp(kernbuf, "Disabled") == 0)
- value = 0;
- }
-
- if (value == 0)
- sbi->ll_rw_stats_on = 0;
- else
- sbi->ll_rw_stats_on = 1;
-
- spin_lock(&sbi->ll_pp_extent_lock);
- for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
- io_extents->pp_extents[i].pid = 0;
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist);
- lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist);
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
-
- return len;
-}
-
-LPROC_SEQ_FOPS(ll_rw_extents_stats);
-
-void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
- struct ll_file_data *file, loff_t pos,
- size_t count, int rw)
-{
- int i, cur = -1;
- struct ll_rw_process_info *process;
- struct ll_rw_process_info *offset;
- int *off_count = &sbi->ll_rw_offset_entry_count;
- int *process_count = &sbi->ll_offset_process_count;
- struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info;
-
- if (!sbi->ll_rw_stats_on)
- return;
- process = sbi->ll_rw_process_info;
- offset = sbi->ll_rw_offset_info;
-
- spin_lock(&sbi->ll_pp_extent_lock);
- /* Extent statistics */
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- if (io_extents->pp_extents[i].pid == pid) {
- cur = i;
- break;
- }
- }
-
- if (cur == -1) {
- /* new process */
- sbi->ll_extent_process_count =
- (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX;
- cur = sbi->ll_extent_process_count;
- io_extents->pp_extents[cur].pid = pid;
- lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist);
- lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist);
- }
-
- for (i = 0; (count >= (1 << LL_HIST_START << i)) &&
- (i < (LL_HIST_MAX - 1)); i++)
- ;
- if (rw == 0) {
- io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++;
- io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++;
- } else {
- io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++;
- io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++;
- }
- spin_unlock(&sbi->ll_pp_extent_lock);
-
- spin_lock(&sbi->ll_process_lock);
- /* Offset statistics */
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- if (process[i].rw_pid == pid) {
- if (process[i].rw_last_file != file) {
- process[i].rw_range_start = pos;
- process[i].rw_last_file_pos = pos + count;
- process[i].rw_smallest_extent = count;
- process[i].rw_largest_extent = count;
- process[i].rw_offset = 0;
- process[i].rw_last_file = file;
- spin_unlock(&sbi->ll_process_lock);
- return;
- }
- if (process[i].rw_last_file_pos != pos) {
- *off_count =
- (*off_count + 1) % LL_OFFSET_HIST_MAX;
- offset[*off_count].rw_op = process[i].rw_op;
- offset[*off_count].rw_pid = pid;
- offset[*off_count].rw_range_start =
- process[i].rw_range_start;
- offset[*off_count].rw_range_end =
- process[i].rw_last_file_pos;
- offset[*off_count].rw_smallest_extent =
- process[i].rw_smallest_extent;
- offset[*off_count].rw_largest_extent =
- process[i].rw_largest_extent;
- offset[*off_count].rw_offset =
- process[i].rw_offset;
- process[i].rw_op = rw;
- process[i].rw_range_start = pos;
- process[i].rw_smallest_extent = count;
- process[i].rw_largest_extent = count;
- process[i].rw_offset = pos -
- process[i].rw_last_file_pos;
- }
- if (process[i].rw_smallest_extent > count)
- process[i].rw_smallest_extent = count;
- if (process[i].rw_largest_extent < count)
- process[i].rw_largest_extent = count;
- process[i].rw_last_file_pos = pos + count;
- spin_unlock(&sbi->ll_process_lock);
- return;
- }
- }
- *process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX;
- process[*process_count].rw_pid = pid;
- process[*process_count].rw_op = rw;
- process[*process_count].rw_range_start = pos;
- process[*process_count].rw_last_file_pos = pos + count;
- process[*process_count].rw_smallest_extent = count;
- process[*process_count].rw_largest_extent = count;
- process[*process_count].rw_offset = 0;
- process[*process_count].rw_last_file = file;
- spin_unlock(&sbi->ll_process_lock);
-}
-
-static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct timespec64 now;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_process_info *offset = sbi->ll_rw_offset_info;
- struct ll_rw_process_info *process = sbi->ll_rw_process_info;
- int i;
-
- ktime_get_real_ts64(&now);
-
- if (!sbi->ll_rw_stats_on) {
- seq_printf(seq, "disabled\n"
- "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n");
- return 0;
- }
- spin_lock(&sbi->ll_process_lock);
-
- seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n",
- (s64)now.tv_sec, (unsigned long)now.tv_nsec);
- seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n",
- "R/W", "PID", "RANGE START", "RANGE END",
- "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET");
- /* We stored the discontiguous offsets here; print them first */
- for (i = 0; i < LL_OFFSET_HIST_MAX; i++) {
- if (offset[i].rw_pid != 0)
- seq_printf(seq,
- "%3c %10d %14Lu %14Lu %17lu %17lu %14Lu",
- offset[i].rw_op == READ ? 'R' : 'W',
- offset[i].rw_pid,
- offset[i].rw_range_start,
- offset[i].rw_range_end,
- (unsigned long)offset[i].rw_smallest_extent,
- (unsigned long)offset[i].rw_largest_extent,
- offset[i].rw_offset);
- }
- /* Then print the current offsets for each process */
- for (i = 0; i < LL_PROCESS_HIST_MAX; i++) {
- if (process[i].rw_pid != 0)
- seq_printf(seq,
- "%3c %10d %14Lu %14Lu %17lu %17lu %14Lu",
- process[i].rw_op == READ ? 'R' : 'W',
- process[i].rw_pid,
- process[i].rw_range_start,
- process[i].rw_last_file_pos,
- (unsigned long)process[i].rw_smallest_extent,
- (unsigned long)process[i].rw_largest_extent,
- process[i].rw_offset);
- }
- spin_unlock(&sbi->ll_process_lock);
-
- return 0;
-}
-
-static ssize_t ll_rw_offset_stats_seq_write(struct file *file,
- const char __user *buf,
- size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct ll_sb_info *sbi = seq->private;
- struct ll_rw_process_info *process_info = sbi->ll_rw_process_info;
- struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info;
- int value = 1, rc = 0;
-
- if (len == 0)
- return -EINVAL;
-
- rc = lprocfs_write_helper(buf, len, &value);
-
- if (rc < 0 && len < 16) {
- char kernbuf[16];
-
- if (copy_from_user(kernbuf, buf, len))
- return -EFAULT;
- kernbuf[len] = 0;
-
- if (kernbuf[len - 1] == '\n')
- kernbuf[len - 1] = 0;
-
- if (strcmp(kernbuf, "disabled") == 0 ||
- strcmp(kernbuf, "Disabled") == 0)
- value = 0;
- }
-
- if (value == 0)
- sbi->ll_rw_stats_on = 0;
- else
- sbi->ll_rw_stats_on = 1;
-
- spin_lock(&sbi->ll_process_lock);
- sbi->ll_offset_process_count = 0;
- sbi->ll_rw_offset_entry_count = 0;
- memset(process_info, 0, sizeof(struct ll_rw_process_info) *
- LL_PROCESS_HIST_MAX);
- memset(offset_info, 0, sizeof(struct ll_rw_process_info) *
- LL_OFFSET_HIST_MAX);
- spin_unlock(&sbi->ll_process_lock);
-
- return len;
-}
-
-LPROC_SEQ_FOPS(ll_rw_offset_stats);
-
-void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
-{
- lvars->obd_vars = lprocfs_llite_obd_vars;
-}
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
deleted file mode 100644
index 6c9ec462eb41..000000000000
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ /dev/null
@@ -1,1202 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/quotaops.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/security.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <lustre_fid.h>
-#include <lustre_dlm.h>
-#include "llite_internal.h"
-
-static int ll_create_it(struct inode *dir, struct dentry *dentry,
- struct lookup_intent *it);
-
-/* called from iget5_locked->find_inode() under inode_hash_lock spinlock */
-static int ll_test_inode(struct inode *inode, void *opaque)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lustre_md *md = opaque;
-
- if (unlikely(!(md->body->mbo_valid & OBD_MD_FLID))) {
- CERROR("MDS body missing FID\n");
- return 0;
- }
-
- if (!lu_fid_eq(&lli->lli_fid, &md->body->mbo_fid1))
- return 0;
-
- return 1;
-}
-
-static int ll_set_inode(struct inode *inode, void *opaque)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct mdt_body *body = ((struct lustre_md *)opaque)->body;
-
- if (unlikely(!(body->mbo_valid & OBD_MD_FLID))) {
- CERROR("MDS body missing FID\n");
- return -EINVAL;
- }
-
- lli->lli_fid = body->mbo_fid1;
- if (unlikely(!(body->mbo_valid & OBD_MD_FLTYPE))) {
- CERROR("Can not initialize inode " DFID
- " without object type: valid = %#llx\n",
- PFID(&lli->lli_fid), body->mbo_valid);
- return -EINVAL;
- }
-
- inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mbo_mode & S_IFMT);
- if (unlikely(inode->i_mode == 0)) {
- CERROR("Invalid inode " DFID " type\n", PFID(&lli->lli_fid));
- return -EINVAL;
- }
-
- ll_lli_init(lli);
-
- return 0;
-}
-
-/**
- * Get an inode by inode number(@hash), which is already instantiated by
- * the intent lookup).
- */
-struct inode *ll_iget(struct super_block *sb, ino_t hash,
- struct lustre_md *md)
-{
- struct inode *inode;
- int rc = 0;
-
- LASSERT(hash != 0);
- inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- if (inode->i_state & I_NEW) {
- rc = ll_read_inode2(inode, md);
- if (!rc && S_ISREG(inode->i_mode) &&
- !ll_i2info(inode)->lli_clob)
- rc = cl_file_inode_init(inode, md);
-
- if (rc) {
- /*
- * Let's clear directory lsm here, otherwise
- * make_bad_inode() will reset the inode mode
- * to regular, then ll_clear_inode will not
- * be able to clear lsm_md
- */
- if (S_ISDIR(inode->i_mode))
- ll_dir_clear_lsm_md(inode);
- make_bad_inode(inode);
- unlock_new_inode(inode);
- iput(inode);
- inode = ERR_PTR(rc);
- } else {
- unlock_new_inode(inode);
- }
- } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
- rc = ll_update_inode(inode, md);
- CDEBUG(D_VFSTRACE, "got inode: " DFID "(%p): rc = %d\n",
- PFID(&md->body->mbo_fid1), inode, rc);
- if (rc) {
- if (S_ISDIR(inode->i_mode))
- ll_dir_clear_lsm_md(inode);
- iput(inode);
- inode = ERR_PTR(rc);
- }
- }
- return inode;
-}
-
-static void ll_invalidate_negative_children(struct inode *dir)
-{
- struct dentry *dentry, *tmp_subdir;
-
- spin_lock(&dir->i_lock);
- hlist_for_each_entry(dentry, &dir->i_dentry, d_u.d_alias) {
- spin_lock(&dentry->d_lock);
- if (!list_empty(&dentry->d_subdirs)) {
- struct dentry *child;
-
- list_for_each_entry_safe(child, tmp_subdir,
- &dentry->d_subdirs,
- d_child) {
- if (d_really_is_negative(child))
- d_lustre_invalidate(child, 1);
- }
- }
- spin_unlock(&dentry->d_lock);
- }
- spin_unlock(&dir->i_lock);
-}
-
-int ll_test_inode_by_fid(struct inode *inode, void *opaque)
-{
- return lu_fid_eq(&ll_i2info(inode)->lli_fid, opaque);
-}
-
-int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, int flag)
-{
- struct lustre_handle lockh;
- int rc;
-
- switch (flag) {
- case LDLM_CB_BLOCKING:
- ldlm_lock2handle(lock, &lockh);
- rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
- if (rc < 0) {
- CDEBUG(D_INODE, "ldlm_cli_cancel: rc = %d\n", rc);
- return rc;
- }
- break;
- case LDLM_CB_CANCELING: {
- struct inode *inode = ll_inode_from_resource_lock(lock);
- __u64 bits = lock->l_policy_data.l_inodebits.bits;
-
- /* Inode is set to lock->l_resource->lr_lvb_inode
- * for mdc - bug 24555
- */
- LASSERT(!lock->l_ast_data);
-
- if (!inode)
- break;
-
- /* Invalidate all dentries associated with this inode */
- LASSERT(ldlm_is_canceling(lock));
-
- if (!fid_res_name_eq(ll_inode2fid(inode),
- &lock->l_resource->lr_name)) {
- LDLM_ERROR(lock,
- "data mismatch with object " DFID "(%p)",
- PFID(ll_inode2fid(inode)), inode);
- LBUG();
- }
-
- if (bits & MDS_INODELOCK_XATTR) {
- if (S_ISDIR(inode->i_mode))
- ll_i2info(inode)->lli_def_stripe_offset = -1;
- ll_xattr_cache_destroy(inode);
- bits &= ~MDS_INODELOCK_XATTR;
- }
-
- /* For OPEN locks we differentiate between lock modes
- * LCK_CR, LCK_CW, LCK_PR - bug 22891
- */
- if (bits & MDS_INODELOCK_OPEN)
- ll_have_md_lock(inode, &bits, lock->l_req_mode);
-
- if (bits & MDS_INODELOCK_OPEN) {
- fmode_t fmode;
-
- switch (lock->l_req_mode) {
- case LCK_CW:
- fmode = FMODE_WRITE;
- break;
- case LCK_PR:
- fmode = FMODE_EXEC;
- break;
- case LCK_CR:
- fmode = FMODE_READ;
- break;
- default:
- LDLM_ERROR(lock, "bad lock mode for OPEN lock");
- LBUG();
- }
-
- ll_md_real_close(inode, fmode);
- }
-
- if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
- MDS_INODELOCK_LAYOUT | MDS_INODELOCK_PERM))
- ll_have_md_lock(inode, &bits, LCK_MINMODE);
-
- if (bits & MDS_INODELOCK_LAYOUT) {
- struct cl_object_conf conf = {
- .coc_opc = OBJECT_CONF_INVALIDATE,
- .coc_inode = inode,
- };
-
- rc = ll_layout_conf(inode, &conf);
- if (rc < 0)
- CDEBUG(D_INODE, "cannot invalidate layout of "
- DFID ": rc = %d\n",
- PFID(ll_inode2fid(inode)), rc);
- }
-
- if (bits & MDS_INODELOCK_UPDATE) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- spin_lock(&lli->lli_lock);
- LTIME_S(inode->i_mtime) = 0;
- LTIME_S(inode->i_atime) = 0;
- LTIME_S(inode->i_ctime) = 0;
- spin_unlock(&lli->lli_lock);
- }
-
- if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
- struct ll_inode_info *lli = ll_i2info(inode);
-
- CDEBUG(D_INODE, "invalidating inode " DFID " lli = %p, pfid = " DFID "\n",
- PFID(ll_inode2fid(inode)), lli,
- PFID(&lli->lli_pfid));
-
- truncate_inode_pages(inode->i_mapping, 0);
-
- if (unlikely(!fid_is_zero(&lli->lli_pfid))) {
- struct inode *master_inode = NULL;
- unsigned long hash;
-
- /*
- * This is slave inode, since all of the child
- * dentry is connected on the master inode, so
- * we have to invalidate the negative children
- * on master inode
- */
- CDEBUG(D_INODE,
- "Invalidate s" DFID " m" DFID "\n",
- PFID(ll_inode2fid(inode)),
- PFID(&lli->lli_pfid));
-
- hash = cl_fid_build_ino(&lli->lli_pfid,
- ll_need_32bit_api(ll_i2sbi(inode)));
- /*
- * Do not lookup the inode with ilookup5,
- * otherwise it will cause dead lock,
- *
- * 1. Client1 send chmod req to the MDT0, then
- * on MDT0, it enqueues master and all of its
- * slaves lock, (mdt_attr_set() ->
- * mdt_lock_slaves()), after gets master and
- * stripe0 lock, it will send the enqueue req
- * (for stripe1) to MDT1, then MDT1 finds the
- * lock has been granted to client2. Then MDT1
- * sends blocking ast to client2.
- *
- * 2. At the same time, client2 tries to unlink
- * the striped dir (rm -rf striped_dir), and
- * during lookup, it will hold the master inode
- * of the striped directory, whose inode state
- * is NEW, then tries to revalidate all of its
- * slaves, (ll_prep_inode()->ll_iget()->
- * ll_read_inode2()-> ll_update_inode().). And
- * it will be blocked on the server side because
- * of 1.
- *
- * 3. Then the client get the blocking_ast req,
- * cancel the lock, but being blocked if using
- * ->ilookup5()), because master inode state is
- * NEW.
- */
- master_inode = ilookup5_nowait(inode->i_sb,
- hash,
- ll_test_inode_by_fid,
- (void *)&lli->lli_pfid);
- if (master_inode) {
- ll_invalidate_negative_children(master_inode);
- iput(master_inode);
- }
- } else {
- ll_invalidate_negative_children(inode);
- }
- }
-
- if ((bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)) &&
- inode->i_sb->s_root &&
- !is_root_inode(inode))
- ll_invalidate_aliases(inode);
-
- iput(inode);
- break;
- }
- default:
- LBUG();
- }
-
- return 0;
-}
-
-__u32 ll_i2suppgid(struct inode *i)
-{
- if (in_group_p(i->i_gid))
- return (__u32)from_kgid(&init_user_ns, i->i_gid);
- else
- return (__u32)(-1);
-}
-
-/* Pack the required supplementary groups into the supplied groups array.
- * If we don't need to use the groups from the target inode(s) then we
- * instead pack one or more groups from the user's supplementary group
- * array in case it might be useful. Not needed if doing an MDS-side upcall.
- */
-void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2)
-{
- LASSERT(i1);
-
- suppgids[0] = ll_i2suppgid(i1);
-
- if (i2)
- suppgids[1] = ll_i2suppgid(i2);
- else
- suppgids[1] = -1;
-}
-
-/*
- * Try to reuse unhashed or invalidated dentries.
- * This is very similar to d_exact_alias(), and any changes in one should be
- * considered for inclusion in the other. The differences are that we don't
- * need an unhashed alias, and we don't want d_compare to be used for
- * comparison.
- */
-static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
-{
- struct dentry *alias;
-
- if (hlist_empty(&inode->i_dentry))
- return NULL;
-
- spin_lock(&inode->i_lock);
- hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
- LASSERT(alias != dentry);
- /*
- * Don't need alias->d_lock here, because aliases with
- * d_parent == entry->d_parent are not subject to name or
- * parent changes, because the parent inode i_mutex is held.
- */
-
- if (alias->d_parent != dentry->d_parent)
- continue;
- if (alias->d_name.hash != dentry->d_name.hash)
- continue;
- if (alias->d_name.len != dentry->d_name.len ||
- memcmp(alias->d_name.name, dentry->d_name.name,
- dentry->d_name.len) != 0)
- continue;
- spin_lock(&alias->d_lock);
- dget_dlock(alias);
- spin_unlock(&alias->d_lock);
- spin_unlock(&inode->i_lock);
- return alias;
- }
- spin_unlock(&inode->i_lock);
-
- return NULL;
-}
-
-/*
- * Similar to d_splice_alias(), but lustre treats invalid alias
- * similar to DCACHE_DISCONNECTED, and tries to use it anyway.
- */
-struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
-{
- if (inode && !S_ISDIR(inode->i_mode)) {
- struct dentry *new = ll_find_alias(inode, de);
-
- if (new) {
- d_move(new, de);
- iput(inode);
- CDEBUG(D_DENTRY,
- "Reuse dentry %p inode %p refc %d flags %#x\n",
- new, d_inode(new), d_count(new), new->d_flags);
- return new;
- }
- d_add(de, inode);
- } else {
- struct dentry *new = d_splice_alias(inode, de);
-
- if (new)
- de = new;
- }
- CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
- de, d_inode(de), d_count(de), de->d_flags);
- return de;
-}
-
-static int ll_lookup_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it,
- struct inode *parent, struct dentry **de)
-{
- struct inode *inode = NULL;
- __u64 bits = 0;
- int rc = 0;
- struct dentry *alias;
-
- /* NB 1 request reference will be taken away by ll_intent_lock()
- * when I return
- */
- CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it,
- it->it_disposition);
- if (!it_disposition(it, DISP_LOOKUP_NEG)) {
- rc = ll_prep_inode(&inode, request, (*de)->d_sb, it);
- if (rc)
- return rc;
-
- ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits);
-
- /* We used to query real size from OSTs here, but actually
- * this is not needed. For stat() calls size would be updated
- * from subsequent do_revalidate()->ll_inode_revalidate_it() in
- * 2.4 and
- * vfs_getattr_it->ll_getattr()->ll_inode_revalidate_it() in 2.6
- * Everybody else who needs correct file size would call
- * ll_glimpse_size or some equivalent themselves anyway.
- * Also see bug 7198.
- */
- }
-
- alias = ll_splice_alias(inode, *de);
- if (IS_ERR(alias)) {
- rc = PTR_ERR(alias);
- goto out;
- }
- *de = alias;
-
- if (!it_disposition(it, DISP_LOOKUP_NEG)) {
- /* We have the "lookup" lock, so unhide dentry */
- if (bits & MDS_INODELOCK_LOOKUP)
- d_lustre_revalidate(*de);
- } else if (!it_disposition(it, DISP_OPEN_CREATE)) {
- /* If file created on server, don't depend on parent UPDATE
- * lock to unhide it. It is left hidden and next lookup can
- * find it in ll_splice_alias.
- */
- /* Check that parent has UPDATE lock. */
- struct lookup_intent parent_it = {
- .it_op = IT_GETATTR,
- .it_lock_handle = 0 };
- struct lu_fid fid = ll_i2info(parent)->lli_fid;
-
- /* If it is striped directory, get the real stripe parent */
- if (unlikely(ll_i2info(parent)->lli_lsm_md)) {
- rc = md_get_fid_from_lsm(ll_i2mdexp(parent),
- ll_i2info(parent)->lli_lsm_md,
- (*de)->d_name.name,
- (*de)->d_name.len, &fid);
- if (rc)
- return rc;
- }
-
- if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it, &fid,
- NULL)) {
- d_lustre_revalidate(*de);
- ll_intent_release(&parent_it);
- }
- }
-
-out:
- if (rc != 0 && it->it_op & IT_OPEN)
- ll_open_cleanup((*de)->d_sb, request);
-
- return rc;
-}
-
-static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
- struct lookup_intent *it)
-{
- struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
- struct dentry *save = dentry, *retval;
- struct ptlrpc_request *req = NULL;
- struct md_op_data *op_data = NULL;
- struct inode *inode;
- __u32 opc;
- int rc;
-
- if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen)
- return ERR_PTR(-ENAMETOOLONG);
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),intent=%s\n",
- dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it));
-
- if (d_mountpoint(dentry))
- CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
-
- if (!it || it->it_op == IT_GETXATTR)
- it = &lookup_it;
-
- if (it->it_op == IT_GETATTR && dentry_may_statahead(parent, dentry)) {
- rc = ll_statahead(parent, &dentry, 0);
- if (rc == 1) {
- if (dentry == save)
- retval = NULL;
- else
- retval = dentry;
- goto out;
- }
- }
-
- if (it->it_op & IT_OPEN && it->it_flags & FMODE_WRITE && sb_rdonly(dentry->d_sb))
- return ERR_PTR(-EROFS);
-
- if (it->it_op & IT_CREAT)
- opc = LUSTRE_OPC_CREATE;
- else
- opc = LUSTRE_OPC_ANY;
-
- op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name,
- dentry->d_name.len, 0, opc, NULL);
- if (IS_ERR(op_data))
- return (void *)op_data;
-
- /* enforce umask if acl disabled or MDS doesn't support umask */
- if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
- it->it_create_mode &= ~current_umask();
-
- rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
- &ll_md_blocking_ast, 0);
- /*
- * If the MDS allows the client to chgrp (CFS_SETGRP_PERM), but the
- * client does not know which suppgid should be sent to the MDS, or
- * some other(s) changed the target file's GID after this RPC sent
- * to the MDS with the suppgid as the original GID, then we should
- * try again with right suppgid.
- */
- if (rc == -EACCES && it->it_op & IT_OPEN &&
- it_disposition(it, DISP_OPEN_DENY)) {
- struct mdt_body *body;
-
- LASSERT(req);
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (op_data->op_suppgids[0] == body->mbo_gid ||
- op_data->op_suppgids[1] == body->mbo_gid ||
- !in_group_p(make_kgid(&init_user_ns, body->mbo_gid))) {
- retval = ERR_PTR(-EACCES);
- goto out;
- }
-
- fid_zero(&op_data->op_fid2);
- op_data->op_suppgids[1] = body->mbo_gid;
- ptlrpc_req_finished(req);
- req = NULL;
- ll_intent_release(it);
- rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
- ll_md_blocking_ast, 0);
- }
-
- if (rc < 0) {
- retval = ERR_PTR(rc);
- goto out;
- }
-
- rc = ll_lookup_it_finish(req, it, parent, &dentry);
- if (rc != 0) {
- ll_intent_release(it);
- retval = ERR_PTR(rc);
- goto out;
- }
-
- inode = d_inode(dentry);
- if ((it->it_op & IT_OPEN) && inode &&
- !S_ISREG(inode->i_mode) &&
- !S_ISDIR(inode->i_mode)) {
- ll_release_openhandle(inode, it);
- }
- ll_lookup_finish_locks(it, inode);
-
- if (dentry == save)
- retval = NULL;
- else
- retval = dentry;
-out:
- if (op_data && !IS_ERR(op_data))
- ll_finish_md_op_data(op_data);
-
- ptlrpc_req_finished(req);
- return retval;
-}
-
-static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
- unsigned int flags)
-{
- struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
- struct dentry *de;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),flags=%u\n",
- dentry, PFID(ll_inode2fid(parent)), parent, flags);
-
- /* Optimize away (CREATE && !OPEN). Let .create handle the race.
- * but only if we have write permissions there, otherwise we need
- * to proceed with lookup. LU-4185
- */
- if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) &&
- (inode_permission(parent, MAY_WRITE | MAY_EXEC) == 0))
- return NULL;
-
- if (flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE))
- itp = NULL;
- else
- itp = &it;
- de = ll_lookup_it(parent, dentry, itp);
-
- if (itp)
- ll_intent_release(itp);
-
- return de;
-}
-
-/*
- * For cached negative dentry and new dentry, handle lookup/create/open
- * together.
- */
-static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned int open_flags,
- umode_t mode, int *opened)
-{
- struct lookup_intent *it;
- struct dentry *de;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),file %p,open_flags %x,mode %x opened %d\n",
- dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
- *opened);
-
- /* Only negative dentries enter here */
- LASSERT(!d_inode(dentry));
-
- if (!d_in_lookup(dentry)) {
- /* A valid negative dentry that just passed revalidation,
- * there's little point to try and open it server-side,
- * even though there's a minuscle chance it might succeed.
- * Either way it's a valid race to just return -ENOENT here.
- */
- if (!(open_flags & O_CREAT))
- return -ENOENT;
-
- /* Otherwise we just unhash it to be rehashed afresh via
- * lookup if necessary
- */
- d_drop(dentry);
- }
-
- it = kzalloc(sizeof(*it), GFP_NOFS);
- if (!it)
- return -ENOMEM;
-
- it->it_op = IT_OPEN;
- if (open_flags & O_CREAT)
- it->it_op |= IT_CREAT;
- it->it_create_mode = (mode & S_IALLUGO) | S_IFREG;
- it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags);
- it->it_flags &= ~MDS_OPEN_FL_INTERNAL;
-
- /* Dentry added to dcache tree in ll_lookup_it */
- de = ll_lookup_it(dir, dentry, it);
- if (IS_ERR(de))
- rc = PTR_ERR(de);
- else if (de)
- dentry = de;
-
- if (!rc) {
- if (it_disposition(it, DISP_OPEN_CREATE)) {
- /* Dentry instantiated in ll_create_it. */
- rc = ll_create_it(dir, dentry, it);
- if (rc) {
- /* We dget in ll_splice_alias. */
- if (de)
- dput(de);
- goto out_release;
- }
-
- *opened |= FILE_CREATED;
- }
- if (d_really_is_positive(dentry) &&
- it_disposition(it, DISP_OPEN_OPEN)) {
- /* Open dentry. */
- if (S_ISFIFO(d_inode(dentry)->i_mode)) {
- /* We cannot call open here as it might
- * deadlock. This case is unreachable in
- * practice because of OBD_CONNECT_NODEVOH.
- */
- rc = finish_no_open(file, de);
- } else {
- file->private_data = it;
- rc = finish_open(file, dentry, NULL, opened);
- /* We dget in ll_splice_alias. finish_open takes
- * care of dget for fd open.
- */
- if (de)
- dput(de);
- }
- } else {
- rc = finish_no_open(file, de);
- }
- }
-
-out_release:
- ll_intent_release(it);
- kfree(it);
-
- return rc;
-}
-
-/* We depend on "mode" being set with the proper file type/umask by now */
-static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
-{
- struct inode *inode = NULL;
- struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- int rc;
-
- LASSERT(it && it->it_disposition);
-
- LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF));
- request = it->it_request;
- it_clear_disposition(it, DISP_ENQ_CREATE_REF);
- rc = ll_prep_inode(&inode, request, dir->i_sb, it);
- if (rc) {
- inode = ERR_PTR(rc);
- goto out;
- }
-
- LASSERT(hlist_empty(&inode->i_dentry));
-
- /* We asked for a lock on the directory, but were granted a
- * lock on the inode. Since we finally have an inode pointer,
- * stuff it in the lock.
- */
- CDEBUG(D_DLMTRACE, "setting l_ast_data to inode " DFID "(%p)\n",
- PFID(ll_inode2fid(dir)), inode);
- ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL);
- out:
- ptlrpc_req_finished(request);
- return inode;
-}
-
-/*
- * By the time this is called, we already have created the directory cache
- * entry for the new file, but it is so far negative - it has no inode.
- *
- * We defer creating the OBD object(s) until open, to keep the intent and
- * non-intent code paths similar, and also because we do not have the MDS
- * inode number before calling ll_create_node() (which is needed for LOV),
- * so we would need to do yet another RPC to the MDS to store the LOV EA
- * data on the MDS. If needed, we would pass the PACKED lmm as data and
- * lmm_size in datalen (the MDS still has code which will handle that).
- *
- * If the create succeeds, we fill in the inode information
- * with d_instantiate().
- */
-static int ll_create_it(struct inode *dir, struct dentry *dentry,
- struct lookup_intent *it)
-{
- struct inode *inode;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p), intent=%s\n",
- dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it));
-
- rc = it_open_error(DISP_OPEN_CREATE, it);
- if (rc)
- return rc;
-
- inode = ll_create_node(dir, it);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- d_instantiate(dentry, inode);
-
- return ll_init_security(dentry, inode, dir);
-}
-
-void ll_update_times(struct ptlrpc_request *request, struct inode *inode)
-{
- struct mdt_body *body = req_capsule_server_get(&request->rq_pill,
- &RMF_MDT_BODY);
-
- LASSERT(body);
- if (body->mbo_valid & OBD_MD_FLMTIME &&
- body->mbo_mtime > LTIME_S(inode->i_mtime)) {
- CDEBUG(D_INODE, "setting fid " DFID " mtime from %lu to %llu\n",
- PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime),
- body->mbo_mtime);
- LTIME_S(inode->i_mtime) = body->mbo_mtime;
- }
- if (body->mbo_valid & OBD_MD_FLCTIME &&
- body->mbo_ctime > LTIME_S(inode->i_ctime))
- LTIME_S(inode->i_ctime) = body->mbo_ctime;
-}
-
-static int ll_new_node(struct inode *dir, struct dentry *dentry,
- const char *tgt, umode_t mode, int rdev,
- __u32 opc)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- struct inode *inode = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- int tgt_len = 0;
- int err;
-
- if (unlikely(tgt))
- tgt_len = strlen(tgt) + 1;
-again:
- op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dentry->d_name.name,
- dentry->d_name.len,
- 0, opc, NULL);
- if (IS_ERR(op_data)) {
- err = PTR_ERR(op_data);
- goto err_exit;
- }
-
- err = md_create(sbi->ll_md_exp, op_data, tgt, tgt_len, mode,
- from_kuid(&init_user_ns, current_fsuid()),
- from_kgid(&init_user_ns, current_fsgid()),
- cfs_curproc_cap_pack(), rdev, &request);
- ll_finish_md_op_data(op_data);
- if (err < 0 && err != -EREMOTE)
- goto err_exit;
-
- /*
- * If the client doesn't know where to create a subdirectory (or
- * in case of a race that sends the RPC to the wrong MDS), the
- * MDS will return -EREMOTE and the client will fetch the layout
- * of the directory, then create the directory on the right MDT.
- */
- if (unlikely(err == -EREMOTE)) {
- struct ll_inode_info *lli = ll_i2info(dir);
- struct lmv_user_md *lum;
- int lumsize, err2;
-
- ptlrpc_req_finished(request);
- request = NULL;
-
- err2 = ll_dir_getstripe(dir, (void **)&lum, &lumsize, &request,
- OBD_MD_DEFAULT_MEA);
- if (!err2) {
- /* Update stripe_offset and retry */
- lli->lli_def_stripe_offset = lum->lum_stripe_offset;
- } else if (err2 == -ENODATA &&
- lli->lli_def_stripe_offset != -1) {
- /*
- * If there are no default stripe EA on the MDT, but the
- * client has default stripe, then it probably means
- * default stripe EA has just been deleted.
- */
- lli->lli_def_stripe_offset = -1;
- } else {
- goto err_exit;
- }
-
- ptlrpc_req_finished(request);
- request = NULL;
- goto again;
- }
-
- ll_update_times(request, dir);
-
- err = ll_prep_inode(&inode, request, dir->i_sb, NULL);
- if (err)
- goto err_exit;
-
- d_instantiate(dentry, inode);
-
- err = ll_init_security(dentry, inode, dir);
-err_exit:
- if (request)
- ptlrpc_req_finished(request);
-
- return err;
-}
-
-static int ll_mknod(struct inode *dir, struct dentry *dchild,
- umode_t mode, dev_t rdev)
-{
- int err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p) mode %o dev %x\n",
- dchild, PFID(ll_inode2fid(dir)), dir, mode,
- old_encode_dev(rdev));
-
- if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
- mode &= ~current_umask();
-
- switch (mode & S_IFMT) {
- case 0:
- mode |= S_IFREG;
- /* for mode = 0 case */
- /* fall through */
- case S_IFREG:
- case S_IFCHR:
- case S_IFBLK:
- case S_IFIFO:
- case S_IFSOCK:
- err = ll_new_node(dir, dchild, NULL, mode,
- old_encode_dev(rdev),
- LUSTRE_OPC_MKNOD);
- break;
- case S_IFDIR:
- err = -EPERM;
- break;
- default:
- err = -EINVAL;
- }
-
- if (!err)
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKNOD, 1);
-
- return err;
-}
-
-/*
- * Plain create. Intent create is handled in atomic_open.
- */
-static int ll_create_nd(struct inode *dir, struct dentry *dentry,
- umode_t mode, bool want_excl)
-{
- int rc;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op:name=%pd, dir=" DFID "(%p), flags=%u, excl=%d\n",
- dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl);
-
- rc = ll_mknod(dir, dentry, mode, 0);
-
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE, 1);
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, unhashed %d\n",
- dentry, d_unhashed(dentry));
-
- return rc;
-}
-
-static int ll_unlink(struct inode *dir, struct dentry *dchild)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n",
- dchild, dir->i_ino, dir->i_generation, dir);
-
- op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dchild->d_name.name,
- dchild->d_name.len,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- if (dchild->d_inode)
- op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
-
- op_data->op_fid2 = op_data->op_fid3;
- rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
- ll_finish_md_op_data(op_data);
- if (rc)
- goto out;
-
- ll_update_times(request, dir);
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK, 1);
-
- out:
- ptlrpc_req_finished(request);
- return rc;
-}
-
-static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
-{
- int err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir" DFID "(%p)\n",
- dentry, PFID(ll_inode2fid(dir)), dir);
-
- if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
- mode &= ~current_umask();
- mode = (mode & (0777 | S_ISVTX)) | S_IFDIR;
-
- err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR);
- if (!err)
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR, 1);
-
- return err;
-}
-
-static int ll_rmdir(struct inode *dir, struct dentry *dchild)
-{
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p)\n",
- dchild, PFID(ll_inode2fid(dir)), dir);
-
- op_data = ll_prep_md_op_data(NULL, dir, NULL,
- dchild->d_name.name,
- dchild->d_name.len,
- S_IFDIR, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- if (dchild->d_inode)
- op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
-
- op_data->op_fid2 = op_data->op_fid3;
- rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
- ll_finish_md_op_data(op_data);
- if (rc == 0) {
- ll_update_times(request, dir);
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_RMDIR, 1);
- }
-
- ptlrpc_req_finished(request);
- return rc;
-}
-
-static int ll_symlink(struct inode *dir, struct dentry *dentry,
- const char *oldname)
-{
- int err;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),target=%.*s\n",
- dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname);
-
- err = ll_new_node(dir, dentry, oldname, S_IFLNK | 0777,
- 0, LUSTRE_OPC_SYMLINK);
-
- if (!err)
- ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK, 1);
-
- return err;
-}
-
-static int ll_link(struct dentry *old_dentry, struct inode *dir,
- struct dentry *new_dentry)
-{
- struct inode *src = d_inode(old_dentry);
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- struct ptlrpc_request *request = NULL;
- struct md_op_data *op_data;
- int err;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op: inode=" DFID "(%p), dir=" DFID "(%p), target=%pd\n",
- PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir,
- new_dentry);
-
- op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name,
- new_dentry->d_name.len,
- 0, LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- err = md_link(sbi->ll_md_exp, op_data, &request);
- ll_finish_md_op_data(op_data);
- if (err)
- goto out;
-
- ll_update_times(request, dir);
- ll_stats_ops_tally(sbi, LPROC_LL_LINK, 1);
-out:
- ptlrpc_req_finished(request);
- return err;
-}
-
-static int ll_rename(struct inode *src, struct dentry *src_dchild,
- struct inode *tgt, struct dentry *tgt_dchild,
- unsigned int flags)
-{
- struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(src);
- struct md_op_data *op_data;
- int err;
-
- if (flags)
- return -EINVAL;
-
- CDEBUG(D_VFSTRACE,
- "VFS Op:oldname=%pd, src_dir=" DFID "(%p), newname=%pd, tgt_dir=" DFID "(%p)\n",
- src_dchild, PFID(ll_inode2fid(src)), src,
- tgt_dchild, PFID(ll_inode2fid(tgt)), tgt);
-
- op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- if (src_dchild->d_inode)
- op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode);
- if (tgt_dchild->d_inode)
- op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode);
-
- err = md_rename(sbi->ll_md_exp, op_data,
- src_dchild->d_name.name,
- src_dchild->d_name.len,
- tgt_dchild->d_name.name,
- tgt_dchild->d_name.len, &request);
- ll_finish_md_op_data(op_data);
- if (!err) {
- ll_update_times(request, src);
- ll_update_times(request, tgt);
- ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1);
- }
-
- ptlrpc_req_finished(request);
- if (!err)
- d_move(src_dchild, tgt_dchild);
- return err;
-}
-
-const struct inode_operations ll_dir_inode_operations = {
- .mknod = ll_mknod,
- .atomic_open = ll_atomic_open,
- .lookup = ll_lookup_nd,
- .create = ll_create_nd,
- /* We need all these non-raw things for NFSD, to not patch it. */
- .unlink = ll_unlink,
- .mkdir = ll_mkdir,
- .rmdir = ll_rmdir,
- .symlink = ll_symlink,
- .link = ll_link,
- .rename = ll_rename,
- .setattr = ll_setattr,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
- .get_acl = ll_get_acl,
-};
-
-const struct inode_operations ll_special_inode_operations = {
- .setattr = ll_setattr,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
- .get_acl = ll_get_acl,
-};
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.c b/drivers/staging/lustre/lustre/llite/range_lock.c
deleted file mode 100644
index cc9565f6bfe2..000000000000
--- a/drivers/staging/lustre/lustre/llite/range_lock.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Range lock is used to allow multiple threads writing a single shared
- * file given each thread is writing to a non-overlapping portion of the
- * file.
- *
- * Refer to the possible upstream kernel version of range lock by
- * Jan Kara <jack@suse.cz>: https://lkml.org/lkml/2013/1/31/480
- *
- * This file could later replaced by the upstream kernel version.
- */
-/*
- * Author: Prakash Surya <surya1@llnl.gov>
- * Author: Bobi Jam <bobijam.xu@intel.com>
- */
-#include "range_lock.h"
-#include <uapi/linux/lustre/lustre_idl.h>
-
-/**
- * Initialize a range lock tree
- *
- * \param tree [in] an empty range lock tree
- *
- * Pre: Caller should have allocated the range lock tree.
- * Post: The range lock tree is ready to function.
- */
-void range_lock_tree_init(struct range_lock_tree *tree)
-{
- tree->rlt_root = NULL;
- tree->rlt_sequence = 0;
- spin_lock_init(&tree->rlt_lock);
-}
-
-/**
- * Initialize a range lock node
- *
- * \param lock [in] an empty range lock node
- * \param start [in] start of the covering region
- * \param end [in] end of the covering region
- *
- * Pre: Caller should have allocated the range lock node.
- * Post: The range lock node is meant to cover [start, end] region
- */
-int range_lock_init(struct range_lock *lock, __u64 start, __u64 end)
-{
- int rc;
-
- memset(&lock->rl_node, 0, sizeof(lock->rl_node));
- if (end != LUSTRE_EOF)
- end >>= PAGE_SHIFT;
- rc = interval_set(&lock->rl_node, start >> PAGE_SHIFT, end);
- if (rc)
- return rc;
-
- INIT_LIST_HEAD(&lock->rl_next_lock);
- lock->rl_task = NULL;
- lock->rl_lock_count = 0;
- lock->rl_blocking_ranges = 0;
- lock->rl_sequence = 0;
- return rc;
-}
-
-static inline struct range_lock *next_lock(struct range_lock *lock)
-{
- return list_entry(lock->rl_next_lock.next, typeof(*lock), rl_next_lock);
-}
-
-/**
- * Helper function of range_unlock()
- *
- * \param node [in] a range lock found overlapped during interval node
- * search
- * \param arg [in] the range lock to be tested
- *
- * \retval INTERVAL_ITER_CONT indicate to continue the search for next
- * overlapping range node
- * \retval INTERVAL_ITER_STOP indicate to stop the search
- */
-static enum interval_iter range_unlock_cb(struct interval_node *node, void *arg)
-{
- struct range_lock *lock = arg;
- struct range_lock *overlap = node2rangelock(node);
- struct range_lock *iter;
-
- list_for_each_entry(iter, &overlap->rl_next_lock, rl_next_lock) {
- if (iter->rl_sequence > lock->rl_sequence) {
- --iter->rl_blocking_ranges;
- LASSERT(iter->rl_blocking_ranges > 0);
- }
- }
- if (overlap->rl_sequence > lock->rl_sequence) {
- --overlap->rl_blocking_ranges;
- if (overlap->rl_blocking_ranges == 0)
- wake_up_process(overlap->rl_task);
- }
- return INTERVAL_ITER_CONT;
-}
-
-/**
- * Unlock a range lock, wake up locks blocked by this lock.
- *
- * \param tree [in] range lock tree
- * \param lock [in] range lock to be deleted
- *
- * If this lock has been granted, relase it; if not, just delete it from
- * the tree or the same region lock list. Wake up those locks only blocked
- * by this lock through range_unlock_cb().
- */
-void range_unlock(struct range_lock_tree *tree, struct range_lock *lock)
-{
- spin_lock(&tree->rlt_lock);
- if (!list_empty(&lock->rl_next_lock)) {
- struct range_lock *next;
-
- if (interval_is_intree(&lock->rl_node)) { /* first lock */
- /* Insert the next same range lock into the tree */
- next = next_lock(lock);
- next->rl_lock_count = lock->rl_lock_count - 1;
- interval_erase(&lock->rl_node, &tree->rlt_root);
- interval_insert(&next->rl_node, &tree->rlt_root);
- } else {
- /* find the first lock in tree */
- list_for_each_entry(next, &lock->rl_next_lock,
- rl_next_lock) {
- if (!interval_is_intree(&next->rl_node))
- continue;
-
- LASSERT(next->rl_lock_count > 0);
- next->rl_lock_count--;
- break;
- }
- }
- list_del_init(&lock->rl_next_lock);
- } else {
- LASSERT(interval_is_intree(&lock->rl_node));
- interval_erase(&lock->rl_node, &tree->rlt_root);
- }
-
- interval_search(tree->rlt_root, &lock->rl_node.in_extent,
- range_unlock_cb, lock);
- spin_unlock(&tree->rlt_lock);
-}
-
-/**
- * Helper function of range_lock()
- *
- * \param node [in] a range lock found overlapped during interval node
- * search
- * \param arg [in] the range lock to be tested
- *
- * \retval INTERVAL_ITER_CONT indicate to continue the search for next
- * overlapping range node
- * \retval INTERVAL_ITER_STOP indicate to stop the search
- */
-static enum interval_iter range_lock_cb(struct interval_node *node, void *arg)
-{
- struct range_lock *lock = arg;
- struct range_lock *overlap = node2rangelock(node);
-
- lock->rl_blocking_ranges += overlap->rl_lock_count + 1;
- return INTERVAL_ITER_CONT;
-}
-
-/**
- * Lock a region
- *
- * \param tree [in] range lock tree
- * \param lock [in] range lock node containing the region span
- *
- * \retval 0 get the range lock
- * \retval <0 error code while not getting the range lock
- *
- * If there exists overlapping range lock, the new lock will wait and
- * retry, if later it find that it is not the chosen one to wake up,
- * it wait again.
- */
-int range_lock(struct range_lock_tree *tree, struct range_lock *lock)
-{
- struct interval_node *node;
- int rc = 0;
-
- spin_lock(&tree->rlt_lock);
- /*
- * We need to check for all conflicting intervals
- * already in the tree.
- */
- interval_search(tree->rlt_root, &lock->rl_node.in_extent,
- range_lock_cb, lock);
- /*
- * Insert to the tree if I am unique, otherwise I've been linked to
- * the rl_next_lock of another lock which has the same range as mine
- * in range_lock_cb().
- */
- node = interval_insert(&lock->rl_node, &tree->rlt_root);
- if (node) {
- struct range_lock *tmp = node2rangelock(node);
-
- list_add_tail(&lock->rl_next_lock, &tmp->rl_next_lock);
- tmp->rl_lock_count++;
- }
- lock->rl_sequence = ++tree->rlt_sequence;
-
- while (lock->rl_blocking_ranges > 0) {
- lock->rl_task = current;
- __set_current_state(TASK_INTERRUPTIBLE);
- spin_unlock(&tree->rlt_lock);
- schedule();
-
- if (signal_pending(current)) {
- range_unlock(tree, lock);
- rc = -EINTR;
- goto out;
- }
- spin_lock(&tree->rlt_lock);
- }
- spin_unlock(&tree->rlt_lock);
-out:
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.h b/drivers/staging/lustre/lustre/llite/range_lock.h
deleted file mode 100644
index 38b2be4e378f..000000000000
--- a/drivers/staging/lustre/lustre/llite/range_lock.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Range lock is used to allow multiple threads writing a single shared
- * file given each thread is writing to a non-overlapping portion of the
- * file.
- *
- * Refer to the possible upstream kernel version of range lock by
- * Jan Kara <jack@suse.cz>: https://lkml.org/lkml/2013/1/31/480
- *
- * This file could later replaced by the upstream kernel version.
- */
-/*
- * Author: Prakash Surya <surya1@llnl.gov>
- * Author: Bobi Jam <bobijam.xu@intel.com>
- */
-#ifndef _RANGE_LOCK_H
-#define _RANGE_LOCK_H
-
-#include <linux/libcfs/libcfs.h>
-#include <interval_tree.h>
-
-struct range_lock {
- struct interval_node rl_node;
- /**
- * Process to enqueue this lock.
- */
- struct task_struct *rl_task;
- /**
- * List of locks with the same range.
- */
- struct list_head rl_next_lock;
- /**
- * Number of locks in the list rl_next_lock
- */
- unsigned int rl_lock_count;
- /**
- * Number of ranges which are blocking acquisition of the lock
- */
- unsigned int rl_blocking_ranges;
- /**
- * Sequence number of range lock. This number is used to get to know
- * the order the locks are queued; this is required for range_cancel().
- */
- __u64 rl_sequence;
-};
-
-static inline struct range_lock *node2rangelock(const struct interval_node *n)
-{
- return container_of(n, struct range_lock, rl_node);
-}
-
-struct range_lock_tree {
- struct interval_node *rlt_root;
- spinlock_t rlt_lock; /* protect range lock tree */
- __u64 rlt_sequence;
-};
-
-void range_lock_tree_init(struct range_lock_tree *tree);
-int range_lock_init(struct range_lock *lock, __u64 start, __u64 end);
-int range_lock(struct range_lock_tree *tree, struct range_lock *lock);
-void range_unlock(struct range_lock_tree *tree, struct range_lock *lock);
-#endif
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
deleted file mode 100644
index 3e008ce7275d..000000000000
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ /dev/null
@@ -1,1214 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/llite/rw.c
- *
- * Lustre Lite I/O page cache routines shared by different kernel revs
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/writeback.h>
-#include <linux/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-/* current_is_kswapd() */
-#include <linux/swap.h>
-#include <linux/bvec.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_cksum.h>
-#include "llite_internal.h"
-
-static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
-
-/**
- * Get readahead pages from the filesystem readahead pool of the client for a
- * thread.
- *
- * /param sbi superblock for filesystem readahead state ll_ra_info
- * /param ria per-thread readahead state
- * /param pages number of pages requested for readahead for the thread.
- *
- * WARNING: This algorithm is used to reduce contention on sbi->ll_lock.
- * It should work well if the ra_max_pages is much greater than the single
- * file's read-ahead window, and not too many threads contending for
- * these readahead pages.
- *
- * TODO: There may be a 'global sync problem' if many threads are trying
- * to get an ra budget that is larger than the remaining readahead pages
- * and reach here at exactly the same time. They will compute /a ret to
- * consume the remaining pages, but will fail at atomic_add_return() and
- * get a zero ra window, although there is still ra space remaining. - Jay
- */
-static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
- struct ra_io_arg *ria,
- unsigned long pages, unsigned long min)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
- long ret;
-
- /* If read-ahead pages left are less than 1M, do not do read-ahead,
- * otherwise it will form small read RPC(< 1M), which hurt server
- * performance a lot.
- */
- ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), pages);
- if (ret < 0 || ret < min_t(long, PTLRPC_MAX_BRW_PAGES, pages)) {
- ret = 0;
- goto out;
- }
-
- if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) {
- atomic_sub(ret, &ra->ra_cur_pages);
- ret = 0;
- }
-
-out:
- if (ret < min) {
- /* override ra limit for maximum performance */
- atomic_add(min - ret, &ra->ra_cur_pages);
- ret = min;
- }
- return ret;
-}
-
-void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
-
- atomic_sub(len, &ra->ra_cur_pages);
-}
-
-static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
-{
- LASSERTF(which < _NR_RA_STAT, "which: %u\n", which);
- lprocfs_counter_incr(sbi->ll_ra_stats, which);
-}
-
-void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-
- ll_ra_stats_inc_sbi(sbi, which);
-}
-
-#define RAS_CDEBUG(ras) \
- CDEBUG(D_READA, \
- "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu rpc %lu " \
- "r %lu ri %lu csr %lu sf %lu sp %lu sl %lu\n", \
- ras->ras_last_readpage, ras->ras_consecutive_requests, \
- ras->ras_consecutive_pages, ras->ras_window_start, \
- ras->ras_window_len, ras->ras_next_readahead, \
- ras->ras_rpc_size, \
- ras->ras_requests, ras->ras_request_index, \
- ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
- ras->ras_stride_pages, ras->ras_stride_length)
-
-static int index_in_window(unsigned long index, unsigned long point,
- unsigned long before, unsigned long after)
-{
- unsigned long start = point - before, end = point + after;
-
- if (start > point)
- start = 0;
- if (end < point)
- end = ~0;
-
- return start <= index && index <= end;
-}
-
-void ll_ras_enter(struct file *f)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(f);
- struct ll_readahead_state *ras = &fd->fd_ras;
-
- spin_lock(&ras->ras_lock);
- ras->ras_requests++;
- ras->ras_request_index = 0;
- ras->ras_consecutive_requests++;
- spin_unlock(&ras->ras_lock);
-}
-
-/**
- * Initiates read-ahead of a page with given index.
- *
- * \retval +ve: page was already uptodate so it will be skipped
- * from being added;
- * \retval -ve: page wasn't added to \a queue for error;
- * \retval 0: page was added into \a queue for read ahead.
- */
-static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, pgoff_t index)
-{
- enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */
- struct cl_object *clob = io->ci_obj;
- struct inode *inode = vvp_object_inode(clob);
- const char *msg = NULL;
- struct cl_page *page;
- struct vvp_page *vpg;
- struct page *vmpage;
- int rc = 0;
-
- vmpage = grab_cache_page_nowait(inode->i_mapping, index);
- if (!vmpage) {
- which = RA_STAT_FAILED_GRAB_PAGE;
- msg = "g_c_p_n failed";
- rc = -EBUSY;
- goto out;
- }
-
- /* Check if vmpage was truncated or reclaimed */
- if (vmpage->mapping != inode->i_mapping) {
- which = RA_STAT_WRONG_GRAB_PAGE;
- msg = "g_c_p_n returned invalid page";
- rc = -EBUSY;
- goto out;
- }
-
- page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
- if (IS_ERR(page)) {
- which = RA_STAT_FAILED_GRAB_PAGE;
- msg = "cl_page_find failed";
- rc = PTR_ERR(page);
- goto out;
- }
-
- lu_ref_add(&page->cp_reference, "ra", current);
- cl_page_assume(env, io, page);
- vpg = cl2vvp_page(cl_object_page_slice(clob, page));
- if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) {
- vpg->vpg_defer_uptodate = 1;
- vpg->vpg_ra_used = 0;
- cl_page_list_add(queue, page);
- } else {
- /* skip completed pages */
- cl_page_unassume(env, io, page);
- /* This page is already uptodate, returning a positive number
- * to tell the callers about this
- */
- rc = 1;
- }
-
- lu_ref_del(&page->cp_reference, "ra", current);
- cl_page_put(env, page);
-out:
- if (vmpage) {
- if (rc)
- unlock_page(vmpage);
- put_page(vmpage);
- }
- if (msg) {
- ll_ra_stats_inc(inode, which);
- CDEBUG(D_READA, "%s\n", msg);
- }
- return rc;
-}
-
-#define RIA_DEBUG(ria) \
- CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \
- ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
- ria->ria_pages)
-
-static inline int stride_io_mode(struct ll_readahead_state *ras)
-{
- return ras->ras_consecutive_stride_requests > 1;
-}
-
-/* The function calculates how much pages will be read in
- * [off, off + length], in such stride IO area,
- * stride_offset = st_off, stride_length = st_len,
- * stride_pages = st_pgs
- *
- * |------------------|*****|------------------|*****|------------|*****|....
- * st_off
- * |--- st_pgs ---|
- * |----- st_len -----|
- *
- * How many pages it should read in such pattern
- * |-------------------------------------------------------------|
- * off
- * |<------ length ------->|
- *
- * = |<----->| + |-------------------------------------| + |---|
- * start_left st_pgs * i end_left
- */
-static unsigned long
-stride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs,
- unsigned long off, unsigned long length)
-{
- __u64 start = off > st_off ? off - st_off : 0;
- __u64 end = off + length > st_off ? off + length - st_off : 0;
- unsigned long start_left = 0;
- unsigned long end_left = 0;
- unsigned long pg_count;
-
- if (st_len == 0 || length == 0 || end == 0)
- return length;
-
- start_left = do_div(start, st_len);
- if (start_left < st_pgs)
- start_left = st_pgs - start_left;
- else
- start_left = 0;
-
- end_left = do_div(end, st_len);
- if (end_left > st_pgs)
- end_left = st_pgs;
-
- CDEBUG(D_READA, "start %llu, end %llu start_left %lu end_left %lu\n",
- start, end, start_left, end_left);
-
- if (start == end)
- pg_count = end_left - (st_pgs - start_left);
- else
- pg_count = start_left + st_pgs * (end - start - 1) + end_left;
-
- CDEBUG(D_READA,
- "st_off %lu, st_len %lu st_pgs %lu off %lu length %lu pgcount %lu\n",
- st_off, st_len, st_pgs, off, length, pg_count);
-
- return pg_count;
-}
-
-static int ria_page_count(struct ra_io_arg *ria)
-{
- __u64 length = ria->ria_end >= ria->ria_start ?
- ria->ria_end - ria->ria_start + 1 : 0;
-
- return stride_pg_count(ria->ria_stoff, ria->ria_length,
- ria->ria_pages, ria->ria_start,
- length);
-}
-
-static unsigned long ras_align(struct ll_readahead_state *ras,
- unsigned long index,
- unsigned long *remainder)
-{
- unsigned long rem = index % ras->ras_rpc_size;
-
- if (remainder)
- *remainder = rem;
- return index - rem;
-}
-
-/*Check whether the index is in the defined ra-window */
-static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
-{
- /* If ria_length == ria_pages, it means non-stride I/O mode,
- * idx should always inside read-ahead window in this case
- * For stride I/O mode, just check whether the idx is inside
- * the ria_pages.
- */
- return ria->ria_length == 0 || ria->ria_length == ria->ria_pages ||
- (idx >= ria->ria_stoff && (idx - ria->ria_stoff) %
- ria->ria_length < ria->ria_pages);
-}
-
-static unsigned long
-ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, struct ll_readahead_state *ras,
- struct ra_io_arg *ria)
-{
- struct cl_read_ahead ra = { 0 };
- unsigned long ra_end = 0;
- bool stride_ria;
- pgoff_t page_idx;
- int rc;
-
- LASSERT(ria);
- RIA_DEBUG(ria);
-
- stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
- for (page_idx = ria->ria_start;
- page_idx <= ria->ria_end && ria->ria_reserved > 0; page_idx++) {
- if (ras_inside_ra_window(page_idx, ria)) {
- if (!ra.cra_end || ra.cra_end < page_idx) {
- unsigned long end;
-
- cl_read_ahead_release(env, &ra);
-
- rc = cl_io_read_ahead(env, io, page_idx, &ra);
- if (rc < 0)
- break;
-
- CDEBUG(D_READA, "idx: %lu, ra: %lu, rpc: %lu\n",
- page_idx, ra.cra_end, ra.cra_rpc_size);
- LASSERTF(ra.cra_end >= page_idx,
- "object: %p, indcies %lu / %lu\n",
- io->ci_obj, ra.cra_end, page_idx);
- /*
- * update read ahead RPC size.
- * NB: it's racy but doesn't matter
- */
- if (ras->ras_rpc_size > ra.cra_rpc_size &&
- ra.cra_rpc_size > 0)
- ras->ras_rpc_size = ra.cra_rpc_size;
- /* trim it to align with optimal RPC size */
- end = ras_align(ras, ria->ria_end + 1, NULL);
- if (end > 0 && !ria->ria_eof)
- ria->ria_end = end - 1;
- if (ria->ria_end < ria->ria_end_min)
- ria->ria_end = ria->ria_end_min;
- if (ria->ria_end > ra.cra_end)
- ria->ria_end = ra.cra_end;
- }
-
- /* If the page is inside the read-ahead window */
- rc = ll_read_ahead_page(env, io, queue, page_idx);
- if (rc < 0)
- break;
-
- ra_end = page_idx;
- if (!rc)
- ria->ria_reserved--;
- } else if (stride_ria) {
- /* If it is not in the read-ahead window, and it is
- * read-ahead mode, then check whether it should skip
- * the stride gap
- */
- pgoff_t offset;
- /* FIXME: This assertion only is valid when it is for
- * forward read-ahead, it will be fixed when backward
- * read-ahead is implemented
- */
- LASSERTF(page_idx >= ria->ria_stoff,
- "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n",
- page_idx,
- ria->ria_start, ria->ria_end, ria->ria_stoff,
- ria->ria_length, ria->ria_pages);
- offset = page_idx - ria->ria_stoff;
- offset = offset % (ria->ria_length);
- if (offset > ria->ria_pages) {
- page_idx += ria->ria_length - offset;
- CDEBUG(D_READA, "i %lu skip %lu\n", page_idx,
- ria->ria_length - offset);
- continue;
- }
- }
- }
- cl_read_ahead_release(env, &ra);
-
- return ra_end;
-}
-
-static int ll_readahead(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue,
- struct ll_readahead_state *ras, bool hit)
-{
- struct vvp_io *vio = vvp_env_io(env);
- struct ll_thread_info *lti = ll_env_info(env);
- struct cl_attr *attr = vvp_env_thread_attr(env);
- unsigned long len, mlen = 0;
- pgoff_t ra_end, start = 0, end = 0;
- struct inode *inode;
- struct ra_io_arg *ria = &lti->lti_ria;
- struct cl_object *clob;
- int ret = 0;
- __u64 kms;
-
- clob = io->ci_obj;
- inode = vvp_object_inode(clob);
-
- memset(ria, 0, sizeof(*ria));
-
- cl_object_attr_lock(clob);
- ret = cl_object_attr_get(env, clob, attr);
- cl_object_attr_unlock(clob);
-
- if (ret != 0)
- return ret;
- kms = attr->cat_kms;
- if (kms == 0) {
- ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN);
- return 0;
- }
-
- spin_lock(&ras->ras_lock);
-
- /**
- * Note: other thread might rollback the ras_next_readahead,
- * if it can not get the full size of prepared pages, see the
- * end of this function. For stride read ahead, it needs to
- * make sure the offset is no less than ras_stride_offset,
- * so that stride read ahead can work correctly.
- */
- if (stride_io_mode(ras))
- start = max(ras->ras_next_readahead, ras->ras_stride_offset);
- else
- start = ras->ras_next_readahead;
-
- if (ras->ras_window_len > 0)
- end = ras->ras_window_start + ras->ras_window_len - 1;
-
- /* Enlarge the RA window to encompass the full read */
- if (vio->vui_ra_valid &&
- end < vio->vui_ra_start + vio->vui_ra_count - 1)
- end = vio->vui_ra_start + vio->vui_ra_count - 1;
-
- if (end) {
- unsigned long end_index;
-
- /* Truncate RA window to end of file */
- end_index = (unsigned long)((kms - 1) >> PAGE_SHIFT);
- if (end_index <= end) {
- end = end_index;
- ria->ria_eof = true;
- }
-
- ras->ras_next_readahead = max(end, end + 1);
- RAS_CDEBUG(ras);
- }
- ria->ria_start = start;
- ria->ria_end = end;
- /* If stride I/O mode is detected, get stride window*/
- if (stride_io_mode(ras)) {
- ria->ria_stoff = ras->ras_stride_offset;
- ria->ria_length = ras->ras_stride_length;
- ria->ria_pages = ras->ras_stride_pages;
- }
- spin_unlock(&ras->ras_lock);
-
- if (end == 0) {
- ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
- return 0;
- }
- len = ria_page_count(ria);
- if (len == 0) {
- ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
- return 0;
- }
-
- CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
- PFID(lu_object_fid(&clob->co_lu)),
- ria->ria_start, ria->ria_end,
- vio->vui_ra_valid ? vio->vui_ra_start : 0,
- vio->vui_ra_valid ? vio->vui_ra_count : 0,
- hit);
-
- /* at least to extend the readahead window to cover current read */
- if (!hit && vio->vui_ra_valid &&
- vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) {
- unsigned long remainder;
-
- /* to the end of current read window. */
- mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start;
- /* trim to RPC boundary */
- ras_align(ras, ria->ria_start, &remainder);
- mlen = min(mlen, ras->ras_rpc_size - remainder);
- ria->ria_end_min = ria->ria_start + mlen;
- }
-
- ria->ria_reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
- if (ria->ria_reserved < len)
- ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
-
- CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
- ria->ria_reserved, len, mlen,
- atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
- ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
-
- ra_end = ll_read_ahead_pages(env, io, queue, ras, ria);
-
- if (ria->ria_reserved)
- ll_ra_count_put(ll_i2sbi(inode), ria->ria_reserved);
-
- if (ra_end == end && ra_end == (kms >> PAGE_SHIFT))
- ll_ra_stats_inc(inode, RA_STAT_EOF);
-
- /* if we didn't get to the end of the region we reserved from
- * the ras we need to go back and update the ras so that the
- * next read-ahead tries from where we left off. we only do so
- * if the region we failed to issue read-ahead on is still ahead
- * of the app and behind the next index to start read-ahead from
- */
- CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n",
- ra_end, end, ria->ria_end, ret);
-
- if (ra_end > 0 && ra_end != end) {
- ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
- spin_lock(&ras->ras_lock);
- if (ra_end <= ras->ras_next_readahead &&
- index_in_window(ra_end, ras->ras_window_start, 0,
- ras->ras_window_len)) {
- ras->ras_next_readahead = ra_end + 1;
- RAS_CDEBUG(ras);
- }
- spin_unlock(&ras->ras_lock);
- }
-
- return ret;
-}
-
-static void ras_set_start(struct inode *inode, struct ll_readahead_state *ras,
- unsigned long index)
-{
- ras->ras_window_start = ras_align(ras, index, NULL);
-}
-
-/* called with the ras_lock held or from places where it doesn't matter */
-static void ras_reset(struct inode *inode, struct ll_readahead_state *ras,
- unsigned long index)
-{
- ras->ras_last_readpage = index;
- ras->ras_consecutive_requests = 0;
- ras->ras_consecutive_pages = 0;
- ras->ras_window_len = 0;
- ras_set_start(inode, ras, index);
- ras->ras_next_readahead = max(ras->ras_window_start, index + 1);
-
- RAS_CDEBUG(ras);
-}
-
-/* called with the ras_lock held or from places where it doesn't matter */
-static void ras_stride_reset(struct ll_readahead_state *ras)
-{
- ras->ras_consecutive_stride_requests = 0;
- ras->ras_stride_length = 0;
- ras->ras_stride_pages = 0;
- RAS_CDEBUG(ras);
-}
-
-void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
-{
- spin_lock_init(&ras->ras_lock);
- ras->ras_rpc_size = PTLRPC_MAX_BRW_PAGES;
- ras_reset(inode, ras, 0);
- ras->ras_requests = 0;
-}
-
-/*
- * Check whether the read request is in the stride window.
- * If it is in the stride window, return 1, otherwise return 0.
- */
-static int index_in_stride_window(struct ll_readahead_state *ras,
- unsigned long index)
-{
- unsigned long stride_gap;
-
- if (ras->ras_stride_length == 0 || ras->ras_stride_pages == 0 ||
- ras->ras_stride_pages == ras->ras_stride_length)
- return 0;
-
- stride_gap = index - ras->ras_last_readpage - 1;
-
- /* If it is contiguous read */
- if (stride_gap == 0)
- return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages;
-
- /* Otherwise check the stride by itself */
- return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap &&
- ras->ras_consecutive_pages == ras->ras_stride_pages;
-}
-
-static void ras_update_stride_detector(struct ll_readahead_state *ras,
- unsigned long index)
-{
- unsigned long stride_gap = index - ras->ras_last_readpage - 1;
-
- if ((stride_gap != 0 || ras->ras_consecutive_stride_requests == 0) &&
- !stride_io_mode(ras)) {
- ras->ras_stride_pages = ras->ras_consecutive_pages;
- ras->ras_stride_length = ras->ras_consecutive_pages +
- stride_gap;
- }
- LASSERT(ras->ras_request_index == 0);
- LASSERT(ras->ras_consecutive_stride_requests == 0);
-
- if (index <= ras->ras_last_readpage) {
- /*Reset stride window for forward read*/
- ras_stride_reset(ras);
- return;
- }
-
- ras->ras_stride_pages = ras->ras_consecutive_pages;
- ras->ras_stride_length = stride_gap + ras->ras_consecutive_pages;
-
- RAS_CDEBUG(ras);
-}
-
-/* Stride Read-ahead window will be increased inc_len according to
- * stride I/O pattern
- */
-static void ras_stride_increase_window(struct ll_readahead_state *ras,
- struct ll_ra_info *ra,
- unsigned long inc_len)
-{
- unsigned long left, step, window_len;
- unsigned long stride_len;
-
- LASSERT(ras->ras_stride_length > 0);
- LASSERTF(ras->ras_window_start + ras->ras_window_len >=
- ras->ras_stride_offset,
- "window_start %lu, window_len %lu stride_offset %lu\n",
- ras->ras_window_start,
- ras->ras_window_len, ras->ras_stride_offset);
-
- stride_len = ras->ras_window_start + ras->ras_window_len -
- ras->ras_stride_offset;
-
- left = stride_len % ras->ras_stride_length;
- window_len = ras->ras_window_len - left;
-
- if (left < ras->ras_stride_pages)
- left += inc_len;
- else
- left = ras->ras_stride_pages + inc_len;
-
- LASSERT(ras->ras_stride_pages != 0);
-
- step = left / ras->ras_stride_pages;
- left %= ras->ras_stride_pages;
-
- window_len += step * ras->ras_stride_length + left;
-
- if (stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length,
- ras->ras_stride_pages, ras->ras_stride_offset,
- window_len) <= ra->ra_max_pages_per_file)
- ras->ras_window_len = window_len;
-
- RAS_CDEBUG(ras);
-}
-
-static void ras_increase_window(struct inode *inode,
- struct ll_readahead_state *ras,
- struct ll_ra_info *ra)
-{
- /* The stretch of ra-window should be aligned with max rpc_size
- * but current clio architecture does not support retrieve such
- * information from lower layer. FIXME later
- */
- if (stride_io_mode(ras)) {
- ras_stride_increase_window(ras, ra, ras->ras_rpc_size);
- } else {
- unsigned long wlen;
-
- wlen = min(ras->ras_window_len + ras->ras_rpc_size,
- ra->ra_max_pages_per_file);
- ras->ras_window_len = ras_align(ras, wlen, NULL);
- }
-}
-
-static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
- struct ll_readahead_state *ras, unsigned long index,
- enum ras_update_flags flags)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
- int zero = 0, stride_detect = 0, ra_miss = 0;
- bool hit = flags & LL_RAS_HIT;
-
- spin_lock(&ras->ras_lock);
-
- if (!hit)
- CDEBUG(D_READA, DFID " pages at %lu miss.\n",
- PFID(ll_inode2fid(inode)), index);
-
- ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS);
-
- /* reset the read-ahead window in two cases. First when the app seeks
- * or reads to some other part of the file. Secondly if we get a
- * read-ahead miss that we think we've previously issued. This can
- * be a symptom of there being so many read-ahead pages that the VM is
- * reclaiming it before we get to it.
- */
- if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
- zero = 1;
- ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE);
- } else if (!hit && ras->ras_window_len &&
- index < ras->ras_next_readahead &&
- index_in_window(index, ras->ras_window_start, 0,
- ras->ras_window_len)) {
- ra_miss = 1;
- ll_ra_stats_inc_sbi(sbi, RA_STAT_MISS_IN_WINDOW);
- }
-
- /* On the second access to a file smaller than the tunable
- * ra_max_read_ahead_whole_pages trigger RA on all pages in the
- * file up to ra_max_pages_per_file. This is simply a best effort
- * and only occurs once per open file. Normal RA behavior is reverted
- * to for subsequent IO. The mmap case does not increment
- * ras_requests and thus can never trigger this behavior.
- */
- if (ras->ras_requests >= 2 && !ras->ras_request_index) {
- __u64 kms_pages;
-
- kms_pages = (i_size_read(inode) + PAGE_SIZE - 1) >>
- PAGE_SHIFT;
-
- CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages,
- ra->ra_max_read_ahead_whole_pages,
- ra->ra_max_pages_per_file);
-
- if (kms_pages &&
- kms_pages <= ra->ra_max_read_ahead_whole_pages) {
- ras->ras_window_start = 0;
- ras->ras_next_readahead = index + 1;
- ras->ras_window_len = min(ra->ra_max_pages_per_file,
- ra->ra_max_read_ahead_whole_pages);
- goto out_unlock;
- }
- }
- if (zero) {
- /* check whether it is in stride I/O mode*/
- if (!index_in_stride_window(ras, index)) {
- if (ras->ras_consecutive_stride_requests == 0 &&
- ras->ras_request_index == 0) {
- ras_update_stride_detector(ras, index);
- ras->ras_consecutive_stride_requests++;
- } else {
- ras_stride_reset(ras);
- }
- ras_reset(inode, ras, index);
- ras->ras_consecutive_pages++;
- goto out_unlock;
- } else {
- ras->ras_consecutive_pages = 0;
- ras->ras_consecutive_requests = 0;
- if (++ras->ras_consecutive_stride_requests > 1)
- stride_detect = 1;
- RAS_CDEBUG(ras);
- }
- } else {
- if (ra_miss) {
- if (index_in_stride_window(ras, index) &&
- stride_io_mode(ras)) {
- if (index != ras->ras_last_readpage + 1)
- ras->ras_consecutive_pages = 0;
- ras_reset(inode, ras, index);
-
- /* If stride-RA hit cache miss, the stride
- * detector will not be reset to avoid the
- * overhead of redetecting read-ahead mode,
- * but on the condition that the stride window
- * is still intersect with normal sequential
- * read-ahead window.
- */
- if (ras->ras_window_start <
- ras->ras_stride_offset)
- ras_stride_reset(ras);
- RAS_CDEBUG(ras);
- } else {
- /* Reset both stride window and normal RA
- * window
- */
- ras_reset(inode, ras, index);
- ras->ras_consecutive_pages++;
- ras_stride_reset(ras);
- goto out_unlock;
- }
- } else if (stride_io_mode(ras)) {
- /* If this is contiguous read but in stride I/O mode
- * currently, check whether stride step still is valid,
- * if invalid, it will reset the stride ra window
- */
- if (!index_in_stride_window(ras, index)) {
- /* Shrink stride read-ahead window to be zero */
- ras_stride_reset(ras);
- ras->ras_window_len = 0;
- ras->ras_next_readahead = index;
- }
- }
- }
- ras->ras_consecutive_pages++;
- ras->ras_last_readpage = index;
- ras_set_start(inode, ras, index);
-
- if (stride_io_mode(ras)) {
- /* Since stride readahead is sensitive to the offset
- * of read-ahead, so we use original offset here,
- * instead of ras_window_start, which is RPC aligned
- */
- ras->ras_next_readahead = max(index, ras->ras_next_readahead);
- ras->ras_window_start = max(ras->ras_stride_offset,
- ras->ras_window_start);
- } else {
- if (ras->ras_next_readahead < ras->ras_window_start)
- ras->ras_next_readahead = ras->ras_window_start;
- if (!hit)
- ras->ras_next_readahead = index + 1;
- }
- RAS_CDEBUG(ras);
-
- /* Trigger RA in the mmap case where ras_consecutive_requests
- * is not incremented and thus can't be used to trigger RA
- */
- if (ras->ras_consecutive_pages >= 4 && flags & LL_RAS_MMAP) {
- ras_increase_window(inode, ras, ra);
- /*
- * reset consecutive pages so that the readahead window can
- * grow gradually.
- */
- ras->ras_consecutive_pages = 0;
- goto out_unlock;
- }
-
- /* Initially reset the stride window offset to next_readahead*/
- if (ras->ras_consecutive_stride_requests == 2 && stride_detect) {
- /**
- * Once stride IO mode is detected, next_readahead should be
- * reset to make sure next_readahead > stride offset
- */
- ras->ras_next_readahead = max(index, ras->ras_next_readahead);
- ras->ras_stride_offset = index;
- ras->ras_window_start = max(index, ras->ras_window_start);
- }
-
- /* The initial ras_window_len is set to the request size. To avoid
- * uselessly reading and discarding pages for random IO the window is
- * only increased once per consecutive request received.
- */
- if ((ras->ras_consecutive_requests > 1 || stride_detect) &&
- !ras->ras_request_index)
- ras_increase_window(inode, ras, ra);
-out_unlock:
- RAS_CDEBUG(ras);
- ras->ras_request_index++;
- spin_unlock(&ras->ras_lock);
-}
-
-int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
-{
- struct inode *inode = vmpage->mapping->host;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lu_env *env;
- struct cl_io *io;
- struct cl_page *page;
- struct cl_object *clob;
- bool redirtied = false;
- bool unlocked = false;
- int result;
- u16 refcheck;
-
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageWriteback(vmpage));
-
- LASSERT(ll_i2dtexp(inode));
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env)) {
- result = PTR_ERR(env);
- goto out;
- }
-
- clob = ll_i2info(inode)->lli_clob;
- LASSERT(clob);
-
- io = vvp_env_thread_io(env);
- io->ci_obj = clob;
- io->ci_ignore_layout = 1;
- result = cl_io_init(env, io, CIT_MISC, clob);
- if (result == 0) {
- page = cl_page_find(env, clob, vmpage->index,
- vmpage, CPT_CACHEABLE);
- if (!IS_ERR(page)) {
- lu_ref_add(&page->cp_reference, "writepage",
- current);
- cl_page_assume(env, io, page);
- result = cl_page_flush(env, io, page);
- if (result != 0) {
- /*
- * Re-dirty page on error so it retries write,
- * but not in case when IO has actually
- * occurred and completed with an error.
- */
- if (!PageError(vmpage)) {
- redirty_page_for_writepage(wbc, vmpage);
- result = 0;
- redirtied = true;
- }
- }
- cl_page_disown(env, io, page);
- unlocked = true;
- lu_ref_del(&page->cp_reference,
- "writepage", current);
- cl_page_put(env, page);
- } else {
- result = PTR_ERR(page);
- }
- }
- cl_io_fini(env, io);
-
- if (redirtied && wbc->sync_mode == WB_SYNC_ALL) {
- loff_t offset = cl_offset(clob, vmpage->index);
-
- /* Flush page failed because the extent is being written out.
- * Wait for the write of extent to be finished to avoid
- * breaking kernel which assumes ->writepage should mark
- * PageWriteback or clean the page.
- */
- result = cl_sync_file_range(inode, offset,
- offset + PAGE_SIZE - 1,
- CL_FSYNC_LOCAL, 1);
- if (result > 0) {
- /* actually we may have written more than one page.
- * decreasing this page because the caller will count
- * it.
- */
- wbc->nr_to_write -= result - 1;
- result = 0;
- }
- }
-
- cl_env_put(env, &refcheck);
- goto out;
-
-out:
- if (result < 0) {
- if (!lli->lli_async_rc)
- lli->lli_async_rc = result;
- SetPageError(vmpage);
- if (!unlocked)
- unlock_page(vmpage);
- }
- return result;
-}
-
-int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
-{
- struct inode *inode = mapping->host;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- loff_t start;
- loff_t end;
- enum cl_fsync_mode mode;
- int range_whole = 0;
- int result;
- int ignore_layout = 0;
-
- if (wbc->range_cyclic) {
- start = mapping->writeback_index << PAGE_SHIFT;
- end = OBD_OBJECT_EOF;
- } else {
- start = wbc->range_start;
- end = wbc->range_end;
- if (end == LLONG_MAX) {
- end = OBD_OBJECT_EOF;
- range_whole = start == 0;
- }
- }
-
- mode = CL_FSYNC_NONE;
- if (wbc->sync_mode == WB_SYNC_ALL)
- mode = CL_FSYNC_LOCAL;
-
- if (sbi->ll_umounting)
- /* if the mountpoint is being umounted, all pages have to be
- * evicted to avoid hitting LBUG when truncate_inode_pages()
- * is called later on.
- */
- ignore_layout = 1;
-
- if (!ll_i2info(inode)->lli_clob)
- return 0;
-
- result = cl_sync_file_range(inode, start, end, mode, ignore_layout);
- if (result > 0) {
- wbc->nr_to_write -= result;
- result = 0;
- }
-
- if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
- if (end == OBD_OBJECT_EOF)
- mapping->writeback_index = 0;
- else
- mapping->writeback_index = (end >> PAGE_SHIFT) + 1;
- }
- return result;
-}
-
-struct ll_cl_context *ll_cl_find(struct file *file)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_cl_context *lcc;
- struct ll_cl_context *found = NULL;
-
- read_lock(&fd->fd_lock);
- list_for_each_entry(lcc, &fd->fd_lccs, lcc_list) {
- if (lcc->lcc_cookie == current) {
- found = lcc;
- break;
- }
- }
- read_unlock(&fd->fd_lock);
-
- return found;
-}
-
-void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
-
- memset(lcc, 0, sizeof(*lcc));
- INIT_LIST_HEAD(&lcc->lcc_list);
- lcc->lcc_cookie = current;
- lcc->lcc_env = env;
- lcc->lcc_io = io;
-
- write_lock(&fd->fd_lock);
- list_add(&lcc->lcc_list, &fd->fd_lccs);
- write_unlock(&fd->fd_lock);
-}
-
-void ll_cl_remove(struct file *file, const struct lu_env *env)
-{
- struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx;
-
- write_lock(&fd->fd_lock);
- list_del_init(&lcc->lcc_list);
- write_unlock(&fd->fd_lock);
-}
-
-static int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
-{
- struct inode *inode = vvp_object_inode(page->cp_obj);
- struct ll_file_data *fd = vvp_env_io(env)->vui_fd;
- struct ll_readahead_state *ras = &fd->fd_ras;
- struct cl_2queue *queue = &io->ci_queue;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct vvp_page *vpg;
- bool uptodate;
- int rc = 0;
-
- vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
- uptodate = vpg->vpg_defer_uptodate;
-
- if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
- sbi->ll_ra_info.ra_max_pages > 0) {
- struct vvp_io *vio = vvp_env_io(env);
- enum ras_update_flags flags = 0;
-
- if (uptodate)
- flags |= LL_RAS_HIT;
- if (!vio->vui_ra_valid)
- flags |= LL_RAS_MMAP;
- ras_update(sbi, inode, ras, vvp_index(vpg), flags);
- }
-
- cl_2queue_init(queue);
- if (uptodate) {
- vpg->vpg_ra_used = 1;
- cl_page_export(env, page, 1);
- cl_page_disown(env, io, page);
- } else {
- cl_page_list_add(&queue->c2_qin, page);
- }
-
- if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
- sbi->ll_ra_info.ra_max_pages > 0) {
- int rc2;
-
- rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
- uptodate);
- CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n",
- PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg));
- }
-
- if (queue->c2_qin.pl_nr > 0)
- rc = cl_io_submit_rw(env, io, CRT_READ, queue);
-
- /*
- * Unlock unsent pages in case of error.
- */
- cl_page_list_disown(env, io, &queue->c2_qin);
- cl_2queue_fini(env, queue);
-
- return rc;
-}
-
-int ll_readpage(struct file *file, struct page *vmpage)
-{
- struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob;
- struct ll_cl_context *lcc;
- const struct lu_env *env;
- struct cl_io *io;
- struct cl_page *page;
- int result;
-
- lcc = ll_cl_find(file);
- if (!lcc) {
- unlock_page(vmpage);
- return -EIO;
- }
-
- env = lcc->lcc_env;
- io = lcc->lcc_io;
- LASSERT(io->ci_state == CIS_IO_GOING);
- page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
- if (!IS_ERR(page)) {
- LASSERT(page->cp_type == CPT_CACHEABLE);
- if (likely(!PageUptodate(vmpage))) {
- cl_page_assume(env, io, page);
- result = ll_io_read_page(env, io, page);
- } else {
- /* Page from a non-object file. */
- unlock_page(vmpage);
- result = 0;
- }
- cl_page_put(env, page);
- } else {
- unlock_page(vmpage);
- result = PTR_ERR(page);
- }
- return result;
-}
-
-int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page, enum cl_req_type crt)
-{
- struct cl_2queue *queue;
- int result;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- queue = &io->ci_queue;
- cl_2queue_init_page(queue, page);
-
- result = cl_io_submit_sync(env, io, crt, queue, 0);
- LASSERT(cl_page_is_owned(page, io));
-
- if (crt == CRT_READ)
- /*
- * in CRT_WRITE case page is left locked even in case of
- * error.
- */
- cl_page_list_disown(env, io, &queue->c2_qin);
- cl_2queue_fini(env, queue);
-
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
deleted file mode 100644
index 722e5ea1af5f..000000000000
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ /dev/null
@@ -1,641 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/lustre/llite/rw26.c
- *
- * Lustre Lite I/O page cache routines for the 2.5/2.6 kernel version
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uaccess.h>
-
-#include <linux/migrate.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/mpage.h>
-#include <linux/writeback.h>
-#include <linux/pagemap.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-/**
- * Implements Linux VM address_space::invalidatepage() method. This method is
- * called when the page is truncate from a file, either as a result of
- * explicit truncate, or when inode is removed from memory (as a result of
- * final iput(), umount, or memory pressure induced icache shrinking).
- *
- * [0, offset] bytes of the page remain valid (this is for a case of not-page
- * aligned truncate). Lustre leaves partially truncated page in the cache,
- * relying on struct inode::i_size to limit further accesses.
- */
-static void ll_invalidatepage(struct page *vmpage, unsigned int offset,
- unsigned int length)
-{
- struct inode *inode;
- struct lu_env *env;
- struct cl_page *page;
- struct cl_object *obj;
-
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageWriteback(vmpage));
-
- /*
- * It is safe to not check anything in invalidatepage/releasepage
- * below because they are run with page locked and all our io is
- * happening with locked page too
- */
- if (offset == 0 && length == PAGE_SIZE) {
- /* See the comment in ll_releasepage() */
- env = cl_env_percpu_get();
- LASSERT(!IS_ERR(env));
- inode = vmpage->mapping->host;
- obj = ll_i2info(inode)->lli_clob;
- if (obj) {
- page = cl_vmpage_page(vmpage, obj);
- if (page) {
- cl_page_delete(env, page);
- cl_page_put(env, page);
- }
- } else {
- LASSERT(vmpage->private == 0);
- }
- cl_env_percpu_put(env);
- }
-}
-
-static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
-{
- struct lu_env *env;
- struct cl_object *obj;
- struct cl_page *page;
- struct address_space *mapping;
- int result = 0;
-
- LASSERT(PageLocked(vmpage));
- if (PageWriteback(vmpage) || PageDirty(vmpage))
- return 0;
-
- mapping = vmpage->mapping;
- if (!mapping)
- return 1;
-
- obj = ll_i2info(mapping->host)->lli_clob;
- if (!obj)
- return 1;
-
- /* 1 for caller, 1 for cl_page and 1 for page cache */
- if (page_count(vmpage) > 3)
- return 0;
-
- page = cl_vmpage_page(vmpage, obj);
- if (!page)
- return 1;
-
- env = cl_env_percpu_get();
- LASSERT(!IS_ERR(env));
-
- if (!cl_page_in_use(page)) {
- result = 1;
- cl_page_delete(env, page);
- }
-
- /* To use percpu env array, the call path can not be rescheduled;
- * otherwise percpu array will be messed if ll_releaspage() called
- * again on the same CPU.
- *
- * If this page holds the last refc of cl_object, the following
- * call path may cause reschedule:
- * cl_page_put -> cl_page_free -> cl_object_put ->
- * lu_object_put -> lu_object_free -> lov_delete_raid0.
- *
- * However, the kernel can't get rid of this inode until all pages have
- * been cleaned up. Now that we hold page lock here, it's pretty safe
- * that we won't get into object delete path.
- */
- LASSERT(cl_object_refc(obj) > 1);
- cl_page_put(env, page);
-
- cl_env_percpu_put(env);
- return result;
-}
-
-#define MAX_DIRECTIO_SIZE (2 * 1024 * 1024 * 1024UL)
-
-/* ll_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- */
-static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
-{
- int i;
-
- for (i = 0; i < npages; i++) {
- if (do_dirty)
- set_page_dirty_lock(pages[i]);
- put_page(pages[i]);
- }
- kvfree(pages);
-}
-
-ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
- int rw, struct inode *inode,
- struct ll_dio_pages *pv)
-{
- struct cl_page *clp;
- struct cl_2queue *queue;
- struct cl_object *obj = io->ci_obj;
- int i;
- ssize_t rc = 0;
- loff_t file_offset = pv->ldp_start_offset;
- size_t size = pv->ldp_size;
- int page_count = pv->ldp_nr;
- struct page **pages = pv->ldp_pages;
- size_t page_size = cl_page_size(obj);
- bool do_io;
- int io_pages = 0;
-
- queue = &io->ci_queue;
- cl_2queue_init(queue);
- for (i = 0; i < page_count; i++) {
- if (pv->ldp_offsets)
- file_offset = pv->ldp_offsets[i];
-
- LASSERT(!(file_offset & (page_size - 1)));
- clp = cl_page_find(env, obj, cl_index(obj, file_offset),
- pv->ldp_pages[i], CPT_TRANSIENT);
- if (IS_ERR(clp)) {
- rc = PTR_ERR(clp);
- break;
- }
-
- rc = cl_page_own(env, io, clp);
- if (rc) {
- LASSERT(clp->cp_state == CPS_FREEING);
- cl_page_put(env, clp);
- break;
- }
-
- do_io = true;
-
- /* check the page type: if the page is a host page, then do
- * write directly
- */
- if (clp->cp_type == CPT_CACHEABLE) {
- struct page *vmpage = cl_page_vmpage(clp);
- struct page *src_page;
- struct page *dst_page;
- void *src;
- void *dst;
-
- src_page = (rw == WRITE) ? pages[i] : vmpage;
- dst_page = (rw == WRITE) ? vmpage : pages[i];
-
- src = kmap_atomic(src_page);
- dst = kmap_atomic(dst_page);
- memcpy(dst, src, min(page_size, size));
- kunmap_atomic(dst);
- kunmap_atomic(src);
-
- /* make sure page will be added to the transfer by
- * cl_io_submit()->...->vvp_page_prep_write().
- */
- if (rw == WRITE)
- set_page_dirty(vmpage);
-
- if (rw == READ) {
- /* do not issue the page for read, since it
- * may reread a ra page which has NOT uptodate
- * bit set.
- */
- cl_page_disown(env, io, clp);
- do_io = false;
- }
- }
-
- if (likely(do_io)) {
- /*
- * Add a page to the incoming page list of 2-queue.
- */
- cl_page_list_add(&queue->c2_qin, clp);
-
- /*
- * Set page clip to tell transfer formation engine
- * that page has to be sent even if it is beyond KMS.
- */
- cl_page_clip(env, clp, 0, min(size, page_size));
-
- ++io_pages;
- }
-
- /* drop the reference count for cl_page_find */
- cl_page_put(env, clp);
- size -= page_size;
- file_offset += page_size;
- }
-
- if (rc == 0 && io_pages) {
- rc = cl_io_submit_sync(env, io,
- rw == READ ? CRT_READ : CRT_WRITE,
- queue, 0);
- }
- if (rc == 0)
- rc = pv->ldp_size;
-
- cl_2queue_discard(env, io, queue);
- cl_2queue_disown(env, io, queue);
- cl_2queue_fini(env, queue);
- return rc;
-}
-EXPORT_SYMBOL(ll_direct_rw_pages);
-
-static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
- int rw, struct inode *inode,
- struct address_space *mapping,
- size_t size, loff_t file_offset,
- struct page **pages, int page_count)
-{
- struct ll_dio_pages pvec = {
- .ldp_pages = pages,
- .ldp_nr = page_count,
- .ldp_size = size,
- .ldp_offsets = NULL,
- .ldp_start_offset = file_offset
- };
-
- return ll_direct_rw_pages(env, io, rw, inode, &pvec);
-}
-
-/* This is the maximum size of a single O_DIRECT request, based on the
- * kmalloc limit. We need to fit all of the brw_page structs, each one
- * representing PAGE_SIZE worth of user data, into a single buffer, and
- * then truncate this to be a full-sized RPC. For 4kB PAGE_SIZE this is
- * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc.
- */
-#define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \
- PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1))
-static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
-{
- struct ll_cl_context *lcc;
- const struct lu_env *env;
- struct cl_io *io;
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- loff_t file_offset = iocb->ki_pos;
- ssize_t count = iov_iter_count(iter);
- ssize_t tot_bytes = 0, result = 0;
- long size = MAX_DIO_SIZE;
-
- /* Check EOF by ourselves */
- if (iov_iter_rw(iter) == READ && file_offset >= i_size_read(inode))
- return 0;
-
- /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
- if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
- return -EINVAL;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n",
- PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
- file_offset, file_offset, count >> PAGE_SHIFT,
- MAX_DIO_SIZE >> PAGE_SHIFT);
-
- /* Check that all user buffers are aligned as well */
- if (iov_iter_alignment(iter) & ~PAGE_MASK)
- return -EINVAL;
-
- lcc = ll_cl_find(file);
- if (!lcc)
- return -EIO;
-
- env = lcc->lcc_env;
- LASSERT(!IS_ERR(env));
- io = lcc->lcc_io;
- LASSERT(io);
-
- while (iov_iter_count(iter)) {
- struct page **pages;
- size_t offs;
-
- count = min_t(size_t, iov_iter_count(iter), size);
- if (iov_iter_rw(iter) == READ) {
- if (file_offset >= i_size_read(inode))
- break;
- if (file_offset + count > i_size_read(inode))
- count = i_size_read(inode) - file_offset;
- }
-
- result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
- if (likely(result > 0)) {
- int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);
-
- result = ll_direct_IO_26_seg(env, io, iov_iter_rw(iter),
- inode, file->f_mapping,
- result, file_offset, pages,
- n);
- ll_free_user_pages(pages, n, iov_iter_rw(iter) == READ);
- }
- if (unlikely(result <= 0)) {
- /* If we can't allocate a large enough buffer
- * for the request, shrink it to a smaller
- * PAGE_SIZE multiple and try again.
- * We should always be able to kmalloc for a
- * page worth of page pointers = 4MB on i386.
- */
- if (result == -ENOMEM &&
- size > (PAGE_SIZE / sizeof(*pages)) *
- PAGE_SIZE) {
- size = ((((size / 2) - 1) |
- ~PAGE_MASK) + 1) &
- PAGE_MASK;
- CDEBUG(D_VFSTRACE, "DIO size now %lu\n",
- size);
- continue;
- }
-
- goto out;
- }
- iov_iter_advance(iter, result);
- tot_bytes += result;
- file_offset += result;
- }
-out:
- if (tot_bytes > 0) {
- struct vvp_io *vio = vvp_env_io(env);
-
- /* no commit async for direct IO */
- vio->u.write.vui_written += tot_bytes;
- }
-
- return tot_bytes ? tot_bytes : result;
-}
-
-/**
- * Prepare partially written-to page for a write.
- */
-static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io,
- struct cl_page *pg)
-{
- struct cl_attr *attr = vvp_env_thread_attr(env);
- struct cl_object *obj = io->ci_obj;
- struct vvp_page *vpg = cl_object_page_slice(obj, pg);
- loff_t offset = cl_offset(obj, vvp_index(vpg));
- int result;
-
- cl_object_attr_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- cl_object_attr_unlock(obj);
- if (result == 0) {
- /*
- * If are writing to a new page, no need to read old data.
- * The extent locking will have updated the KMS, and for our
- * purposes here we can treat it like i_size.
- */
- if (attr->cat_kms <= offset) {
- char *kaddr = kmap_atomic(vpg->vpg_page);
-
- memset(kaddr, 0, cl_page_size(obj));
- kunmap_atomic(kaddr);
- } else if (vpg->vpg_defer_uptodate) {
- vpg->vpg_ra_used = 1;
- } else {
- result = ll_page_sync_io(env, io, pg, CRT_READ);
- }
- }
- return result;
-}
-
-static int ll_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int flags,
- struct page **pagep, void **fsdata)
-{
- struct ll_cl_context *lcc;
- const struct lu_env *env = NULL;
- struct cl_io *io;
- struct cl_page *page = NULL;
- struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
- pgoff_t index = pos >> PAGE_SHIFT;
- struct page *vmpage = NULL;
- unsigned int from = pos & (PAGE_SIZE - 1);
- unsigned int to = from + len;
- int result = 0;
-
- CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);
-
- lcc = ll_cl_find(file);
- if (!lcc) {
- io = NULL;
- result = -EIO;
- goto out;
- }
-
- env = lcc->lcc_env;
- io = lcc->lcc_io;
-
- /* To avoid deadlock, try to lock page first. */
- vmpage = grab_cache_page_nowait(mapping, index);
- if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) {
- struct vvp_io *vio = vvp_env_io(env);
- struct cl_page_list *plist = &vio->u.write.vui_queue;
-
- /* if the page is already in dirty cache, we have to commit
- * the pages right now; otherwise, it may cause deadlock
- * because it holds page lock of a dirty page and request for
- * more grants. It's okay for the dirty page to be the first
- * one in commit page list, though.
- */
- if (vmpage && plist->pl_nr > 0) {
- unlock_page(vmpage);
- put_page(vmpage);
- vmpage = NULL;
- }
-
- /* commit pages and then wait for page lock */
- result = vvp_io_write_commit(env, io);
- if (result < 0)
- goto out;
-
- if (!vmpage) {
- vmpage = grab_cache_page_write_begin(mapping, index,
- flags);
- if (!vmpage) {
- result = -ENOMEM;
- goto out;
- }
- }
- }
-
- page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
- if (IS_ERR(page)) {
- result = PTR_ERR(page);
- goto out;
- }
-
- lcc->lcc_page = page;
- lu_ref_add(&page->cp_reference, "cl_io", io);
-
- cl_page_assume(env, io, page);
- if (!PageUptodate(vmpage)) {
- /*
- * We're completely overwriting an existing page,
- * so _don't_ set it up to date until commit_write
- */
- if (from == 0 && to == PAGE_SIZE) {
- CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n");
- POISON_PAGE(vmpage, 0x11);
- } else {
- /* TODO: can be optimized at OSC layer to check if it
- * is a lockless IO. In that case, it's not necessary
- * to read the data.
- */
- result = ll_prepare_partial_page(env, io, page);
- if (result == 0)
- SetPageUptodate(vmpage);
- }
- }
- if (result < 0)
- cl_page_unassume(env, io, page);
-out:
- if (result < 0) {
- if (vmpage) {
- unlock_page(vmpage);
- put_page(vmpage);
- }
- if (!IS_ERR_OR_NULL(page)) {
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
- }
- if (io)
- io->ci_result = result;
- } else {
- *pagep = vmpage;
- *fsdata = lcc;
- }
- return result;
-}
-
-static int ll_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned int len, unsigned int copied,
- struct page *vmpage, void *fsdata)
-{
- struct ll_cl_context *lcc = fsdata;
- const struct lu_env *env;
- struct cl_io *io;
- struct vvp_io *vio;
- struct cl_page *page;
- unsigned int from = pos & (PAGE_SIZE - 1);
- bool unplug = false;
- int result = 0;
-
- put_page(vmpage);
-
- env = lcc->lcc_env;
- page = lcc->lcc_page;
- io = lcc->lcc_io;
- vio = vvp_env_io(env);
-
- LASSERT(cl_page_is_owned(page, io));
- if (copied > 0) {
- struct cl_page_list *plist = &vio->u.write.vui_queue;
-
- lcc->lcc_page = NULL; /* page will be queued */
-
- /* Add it into write queue */
- cl_page_list_add(plist, page);
- if (plist->pl_nr == 1) /* first page */
- vio->u.write.vui_from = from;
- else
- LASSERT(from == 0);
- vio->u.write.vui_to = from + copied;
-
- /*
- * To address the deadlock in balance_dirty_pages() where
- * this dirty page may be written back in the same thread.
- */
- if (PageDirty(vmpage))
- unplug = true;
-
- /* We may have one full RPC, commit it soon */
- if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
- unplug = true;
-
- CL_PAGE_DEBUG(D_VFSTRACE, env, page,
- "queued page: %d.\n", plist->pl_nr);
- } else {
- cl_page_disown(env, io, page);
-
- lcc->lcc_page = NULL;
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
-
- /* page list is not contiguous now, commit it now */
- unplug = true;
- }
-
- if (unplug ||
- file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
- result = vvp_io_write_commit(env, io);
-
- if (result < 0)
- io->ci_result = result;
- return result >= 0 ? copied : result;
-}
-
-#ifdef CONFIG_MIGRATION
-static int ll_migratepage(struct address_space *mapping,
- struct page *newpage, struct page *page,
- enum migrate_mode mode
- )
-{
- /* Always fail page migration until we have a proper implementation */
- return -EIO;
-}
-#endif
-
-const struct address_space_operations ll_aops = {
- .readpage = ll_readpage,
- .direct_IO = ll_direct_IO_26,
- .writepage = ll_writepage,
- .writepages = ll_writepages,
- .set_page_dirty = __set_page_dirty_nobuffers,
- .write_begin = ll_write_begin,
- .write_end = ll_write_end,
- .invalidatepage = ll_invalidatepage,
- .releasepage = (void *)ll_releasepage,
-#ifdef CONFIG_MIGRATION
- .migratepage = ll_migratepage,
-#endif
-};
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
deleted file mode 100644
index 155ce3cf6f60..000000000000
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ /dev/null
@@ -1,1577 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <lustre_dlm.h>
-#include "llite_internal.h"
-
-#define SA_OMITTED_ENTRY_MAX 8ULL
-
-enum se_stat {
- /** negative values are for error cases */
- SA_ENTRY_INIT = 0, /** init entry */
- SA_ENTRY_SUCC = 1, /** stat succeed */
- SA_ENTRY_INVA = 2, /** invalid entry */
-};
-
-/*
- * sa_entry is not refcounted: statahead thread allocates it and do async stat,
- * and in async stat callback ll_statahead_interpret() will add it into
- * sai_interim_entries, later statahead thread will call sa_handle_callback() to
- * instantiate entry and move it into sai_entries, and then only scanner process
- * can access and free it.
- */
-struct sa_entry {
- /* link into sai_interim_entries or sai_entries */
- struct list_head se_list;
- /* link into sai hash table locally */
- struct list_head se_hash;
- /* entry index in the sai */
- __u64 se_index;
- /* low layer ldlm lock handle */
- __u64 se_handle;
- /* entry status */
- enum se_stat se_state;
- /* entry size, contains name */
- int se_size;
- /* pointer to async getattr enqueue info */
- struct md_enqueue_info *se_minfo;
- /* pointer to the async getattr request */
- struct ptlrpc_request *se_req;
- /* pointer to the target inode */
- struct inode *se_inode;
- /* entry name */
- struct qstr se_qstr;
- /* entry fid */
- struct lu_fid se_fid;
-};
-
-static unsigned int sai_generation;
-static DEFINE_SPINLOCK(sai_generation_lock);
-
-/* sa_entry is ready to use */
-static inline int sa_ready(struct sa_entry *entry)
-{
- smp_rmb();
- return (entry->se_state != SA_ENTRY_INIT);
-}
-
-/* hash value to put in sai_cache */
-static inline int sa_hash(int val)
-{
- return val & LL_SA_CACHE_MASK;
-}
-
-/* hash entry into sai_cache */
-static inline void
-sa_rehash(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- int i = sa_hash(entry->se_qstr.hash);
-
- spin_lock(&sai->sai_cache_lock[i]);
- list_add_tail(&entry->se_hash, &sai->sai_cache[i]);
- spin_unlock(&sai->sai_cache_lock[i]);
-}
-
-/*
- * Remove entry from SA table.
- */
-static inline void
-sa_unhash(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- int i = sa_hash(entry->se_qstr.hash);
-
- spin_lock(&sai->sai_cache_lock[i]);
- list_del_init(&entry->se_hash);
- spin_unlock(&sai->sai_cache_lock[i]);
-}
-
-static inline int agl_should_run(struct ll_statahead_info *sai,
- struct inode *inode)
-{
- return (inode && S_ISREG(inode->i_mode) && sai->sai_agl_valid);
-}
-
-/* statahead window is full */
-static inline int sa_sent_full(struct ll_statahead_info *sai)
-{
- return atomic_read(&sai->sai_cache_count) >= sai->sai_max;
-}
-
-/* got async stat replies */
-static inline int sa_has_callback(struct ll_statahead_info *sai)
-{
- return !list_empty(&sai->sai_interim_entries);
-}
-
-static inline int agl_list_empty(struct ll_statahead_info *sai)
-{
- return list_empty(&sai->sai_agls);
-}
-
-/**
- * (1) hit ratio less than 80%
- * or
- * (2) consecutive miss more than 8
- * then means low hit.
- */
-static inline int sa_low_hit(struct ll_statahead_info *sai)
-{
- return ((sai->sai_hit > 7 && sai->sai_hit < 4 * sai->sai_miss) ||
- (sai->sai_consecutive_miss > 8));
-}
-
-/*
- * if the given index is behind of statahead window more than
- * SA_OMITTED_ENTRY_MAX, then it is old.
- */
-static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
-{
- return ((__u64)sai->sai_max + index + SA_OMITTED_ENTRY_MAX <
- sai->sai_index);
-}
-
-/* allocate sa_entry and hash it to allow scanner process to find it */
-static struct sa_entry *
-sa_alloc(struct dentry *parent, struct ll_statahead_info *sai, __u64 index,
- const char *name, int len, const struct lu_fid *fid)
-{
- struct ll_inode_info *lli;
- struct sa_entry *entry;
- int entry_size;
- char *dname;
-
- entry_size = sizeof(struct sa_entry) + (len & ~3) + 4;
- entry = kzalloc(entry_size, GFP_NOFS);
- if (unlikely(!entry))
- return ERR_PTR(-ENOMEM);
-
- CDEBUG(D_READA, "alloc sa entry %.*s(%p) index %llu\n",
- len, name, entry, index);
-
- entry->se_index = index;
- entry->se_state = SA_ENTRY_INIT;
- entry->se_size = entry_size;
- dname = (char *)entry + sizeof(struct sa_entry);
- memcpy(dname, name, len);
- dname[len] = 0;
-
- entry->se_qstr.hash = full_name_hash(parent, name, len);
- entry->se_qstr.len = len;
- entry->se_qstr.name = dname;
- entry->se_fid = *fid;
-
- lli = ll_i2info(sai->sai_dentry->d_inode);
- spin_lock(&lli->lli_sa_lock);
- INIT_LIST_HEAD(&entry->se_list);
- sa_rehash(sai, entry);
- spin_unlock(&lli->lli_sa_lock);
-
- atomic_inc(&sai->sai_cache_count);
-
- return entry;
-}
-
-/* free sa_entry, which should have been unhashed and not in any list */
-static void sa_free(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- CDEBUG(D_READA, "free sa entry %.*s(%p) index %llu\n",
- entry->se_qstr.len, entry->se_qstr.name, entry,
- entry->se_index);
-
- LASSERT(list_empty(&entry->se_list));
- LASSERT(list_empty(&entry->se_hash));
-
- kfree(entry);
- atomic_dec(&sai->sai_cache_count);
-}
-
-/*
- * find sa_entry by name, used by directory scanner, lock is not needed because
- * only scanner can remove the entry from cache.
- */
-static struct sa_entry *
-sa_get(struct ll_statahead_info *sai, const struct qstr *qstr)
-{
- struct sa_entry *entry;
- int i = sa_hash(qstr->hash);
-
- list_for_each_entry(entry, &sai->sai_cache[i], se_hash) {
- if (entry->se_qstr.hash == qstr->hash &&
- entry->se_qstr.len == qstr->len &&
- memcmp(entry->se_qstr.name, qstr->name, qstr->len) == 0)
- return entry;
- }
- return NULL;
-}
-
-/* unhash and unlink sa_entry, and then free it */
-static inline void
-sa_kill(struct ll_statahead_info *sai, struct sa_entry *entry)
-{
- struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
-
- LASSERT(!list_empty(&entry->se_hash));
- LASSERT(!list_empty(&entry->se_list));
- LASSERT(sa_ready(entry));
-
- sa_unhash(sai, entry);
-
- spin_lock(&lli->lli_sa_lock);
- list_del_init(&entry->se_list);
- spin_unlock(&lli->lli_sa_lock);
-
- if (entry->se_inode)
- iput(entry->se_inode);
-
- sa_free(sai, entry);
-}
-
-/* called by scanner after use, sa_entry will be killed */
-static void
-sa_put(struct ll_statahead_info *sai, struct sa_entry *entry, struct ll_inode_info *lli)
-{
- struct sa_entry *tmp, *next;
-
- if (entry && entry->se_state == SA_ENTRY_SUCC) {
- struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode);
-
- sai->sai_hit++;
- sai->sai_consecutive_miss = 0;
- sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max);
- } else {
- sai->sai_miss++;
- sai->sai_consecutive_miss++;
- }
-
- if (entry)
- sa_kill(sai, entry);
-
- /*
- * kill old completed entries, only scanner process does this, no need
- * to lock
- */
- list_for_each_entry_safe(tmp, next, &sai->sai_entries, se_list) {
- if (!is_omitted_entry(sai, tmp->se_index))
- break;
- sa_kill(sai, tmp);
- }
-
- spin_lock(&lli->lli_sa_lock);
- if (sai->sai_task)
- wake_up_process(sai->sai_task);
- spin_unlock(&lli->lli_sa_lock);
-
-}
-
-/*
- * update state and sort add entry to sai_entries by index, return true if
- * scanner is waiting on this entry.
- */
-static bool
-__sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret)
-{
- struct list_head *pos = &sai->sai_entries;
- __u64 index = entry->se_index;
- struct sa_entry *se;
-
- LASSERT(!sa_ready(entry));
- LASSERT(list_empty(&entry->se_list));
-
- list_for_each_entry_reverse(se, &sai->sai_entries, se_list) {
- if (se->se_index < entry->se_index) {
- pos = &se->se_list;
- break;
- }
- }
- list_add(&entry->se_list, pos);
- entry->se_state = ret < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC;
-
- return (index == sai->sai_index_wait);
-}
-
-/*
- * release resources used in async stat RPC, update entry state and wakeup if
- * scanner process it waiting on this entry.
- */
-static void
-sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret)
-{
- struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
- struct md_enqueue_info *minfo = entry->se_minfo;
- struct ptlrpc_request *req = entry->se_req;
- bool wakeup;
-
- /* release resources used in RPC */
- if (minfo) {
- entry->se_minfo = NULL;
- ll_intent_release(&minfo->mi_it);
- iput(minfo->mi_dir);
- kfree(minfo);
- }
-
- if (req) {
- entry->se_req = NULL;
- ptlrpc_req_finished(req);
- }
-
- spin_lock(&lli->lli_sa_lock);
- wakeup = __sa_make_ready(sai, entry, ret);
- spin_unlock(&lli->lli_sa_lock);
-
- if (wakeup)
- wake_up(&sai->sai_waitq);
-}
-
-/* Insert inode into the list of sai_agls. */
-static void ll_agl_add(struct ll_statahead_info *sai,
- struct inode *inode, int index)
-{
- struct ll_inode_info *child = ll_i2info(inode);
- struct ll_inode_info *parent = ll_i2info(sai->sai_dentry->d_inode);
- int added = 0;
-
- spin_lock(&child->lli_agl_lock);
- if (child->lli_agl_index == 0) {
- child->lli_agl_index = index;
- spin_unlock(&child->lli_agl_lock);
-
- LASSERT(list_empty(&child->lli_agl_list));
-
- igrab(inode);
- spin_lock(&parent->lli_agl_lock);
- if (list_empty(&sai->sai_agls))
- added = 1;
- list_add_tail(&child->lli_agl_list, &sai->sai_agls);
- spin_unlock(&parent->lli_agl_lock);
- } else {
- spin_unlock(&child->lli_agl_lock);
- }
-
- if (added > 0)
- wake_up_process(sai->sai_agl_task);
-}
-
-/* allocate sai */
-static struct ll_statahead_info *ll_sai_alloc(struct dentry *dentry)
-{
- struct ll_inode_info *lli = ll_i2info(dentry->d_inode);
- struct ll_statahead_info *sai;
- int i;
-
- sai = kzalloc(sizeof(*sai), GFP_NOFS);
- if (!sai)
- return NULL;
-
- sai->sai_dentry = dget(dentry);
- atomic_set(&sai->sai_refcount, 1);
-
- sai->sai_max = LL_SA_RPC_MIN;
- sai->sai_index = 1;
- init_waitqueue_head(&sai->sai_waitq);
-
- INIT_LIST_HEAD(&sai->sai_interim_entries);
- INIT_LIST_HEAD(&sai->sai_entries);
- INIT_LIST_HEAD(&sai->sai_agls);
-
- for (i = 0; i < LL_SA_CACHE_SIZE; i++) {
- INIT_LIST_HEAD(&sai->sai_cache[i]);
- spin_lock_init(&sai->sai_cache_lock[i]);
- }
- atomic_set(&sai->sai_cache_count, 0);
-
- spin_lock(&sai_generation_lock);
- lli->lli_sa_generation = ++sai_generation;
- if (unlikely(!sai_generation))
- lli->lli_sa_generation = ++sai_generation;
- spin_unlock(&sai_generation_lock);
-
- return sai;
-}
-
-/* free sai */
-static inline void ll_sai_free(struct ll_statahead_info *sai)
-{
- LASSERT(sai->sai_dentry);
- dput(sai->sai_dentry);
- kfree(sai);
-}
-
-/*
- * take refcount of sai if sai for @dir exists, which means statahead is on for
- * this directory.
- */
-static inline struct ll_statahead_info *ll_sai_get(struct inode *dir)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = NULL;
-
- spin_lock(&lli->lli_sa_lock);
- sai = lli->lli_sai;
- if (sai)
- atomic_inc(&sai->sai_refcount);
- spin_unlock(&lli->lli_sa_lock);
-
- return sai;
-}
-
-/*
- * put sai refcount after use, if refcount reaches zero, free sai and sa_entries
- * attached to it.
- */
-static void ll_sai_put(struct ll_statahead_info *sai)
-{
- struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
-
- if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_sa_lock)) {
- struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode);
- struct sa_entry *entry, *next;
-
- lli->lli_sai = NULL;
- spin_unlock(&lli->lli_sa_lock);
-
- LASSERT(sai->sai_task == NULL);
- LASSERT(sai->sai_agl_task == NULL);
- LASSERT(sai->sai_sent == sai->sai_replied);
- LASSERT(!sa_has_callback(sai));
-
- list_for_each_entry_safe(entry, next, &sai->sai_entries,
- se_list)
- sa_kill(sai, entry);
-
- LASSERT(atomic_read(&sai->sai_cache_count) == 0);
- LASSERT(list_empty(&sai->sai_agls));
-
- ll_sai_free(sai);
- atomic_dec(&sbi->ll_sa_running);
- }
-}
-
-/* Do NOT forget to drop inode refcount when into sai_agls. */
-static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- __u64 index = lli->lli_agl_index;
- int rc;
-
- LASSERT(list_empty(&lli->lli_agl_list));
-
- /* AGL maybe fall behind statahead with one entry */
- if (is_omitted_entry(sai, index + 1)) {
- lli->lli_agl_index = 0;
- iput(inode);
- return;
- }
-
- /* Someone is in glimpse (sync or async), do nothing. */
- rc = down_write_trylock(&lli->lli_glimpse_sem);
- if (rc == 0) {
- lli->lli_agl_index = 0;
- iput(inode);
- return;
- }
-
- /*
- * Someone triggered glimpse within 1 sec before.
- * 1) The former glimpse succeeded with glimpse lock granted by OST, and
- * if the lock is still cached on client, AGL needs to do nothing. If
- * it is cancelled by other client, AGL maybe cannot obtain new lock
- * for no glimpse callback triggered by AGL.
- * 2) The former glimpse succeeded, but OST did not grant glimpse lock.
- * Under such case, it is quite possible that the OST will not grant
- * glimpse lock for AGL also.
- * 3) The former glimpse failed, compared with other two cases, it is
- * relative rare. AGL can ignore such case, and it will not muchly
- * affect the performance.
- */
- if (lli->lli_glimpse_time != 0 &&
- time_before(cfs_time_shift(-1), lli->lli_glimpse_time)) {
- up_write(&lli->lli_glimpse_sem);
- lli->lli_agl_index = 0;
- iput(inode);
- return;
- }
-
- CDEBUG(D_READA, "Handling (init) async glimpse: inode = "
- DFID ", idx = %llu\n", PFID(&lli->lli_fid), index);
-
- cl_agl(inode);
- lli->lli_agl_index = 0;
- lli->lli_glimpse_time = cfs_time_current();
- up_write(&lli->lli_glimpse_sem);
-
- CDEBUG(D_READA, "Handled (init) async glimpse: inode= "
- DFID ", idx = %llu, rc = %d\n",
- PFID(&lli->lli_fid), index, rc);
-
- iput(inode);
-}
-
-/*
- * prepare inode for sa entry, add it into agl list, now sa_entry is ready
- * to be used by scanner process.
- */
-static void sa_instantiate(struct ll_statahead_info *sai,
- struct sa_entry *entry)
-{
- struct inode *dir = sai->sai_dentry->d_inode;
- struct inode *child;
- struct md_enqueue_info *minfo;
- struct lookup_intent *it;
- struct ptlrpc_request *req;
- struct mdt_body *body;
- int rc = 0;
-
- LASSERT(entry->se_handle != 0);
-
- minfo = entry->se_minfo;
- it = &minfo->mi_it;
- req = entry->se_req;
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- rc = -EFAULT;
- goto out;
- }
-
- child = entry->se_inode;
- if (child) {
- /* revalidate; unlinked and re-created with the same name */
- if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->mbo_fid1))) {
- entry->se_inode = NULL;
- iput(child);
- child = NULL;
- }
- }
-
- it->it_lock_handle = entry->se_handle;
- rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL);
- if (rc != 1) {
- rc = -EAGAIN;
- goto out;
- }
-
- rc = ll_prep_inode(&child, req, dir->i_sb, it);
- if (rc)
- goto out;
-
- CDEBUG(D_READA, "%s: setting %.*s" DFID " l_data to inode %p\n",
- ll_get_fsname(child->i_sb, NULL, 0),
- entry->se_qstr.len, entry->se_qstr.name,
- PFID(ll_inode2fid(child)), child);
- ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL);
-
- entry->se_inode = child;
-
- if (agl_should_run(sai, child))
- ll_agl_add(sai, child, entry->se_index);
-
-out:
- /*
- * sa_make_ready() will drop ldlm ibits lock refcount by calling
- * ll_intent_drop_lock() in spite of failures. Do not worry about
- * calling ll_intent_drop_lock() more than once.
- */
- sa_make_ready(sai, entry, rc);
-}
-
-/* once there are async stat replies, instantiate sa_entry from replies */
-static void sa_handle_callback(struct ll_statahead_info *sai)
-{
- struct ll_inode_info *lli;
-
- lli = ll_i2info(sai->sai_dentry->d_inode);
-
- while (sa_has_callback(sai)) {
- struct sa_entry *entry;
-
- spin_lock(&lli->lli_sa_lock);
- if (unlikely(!sa_has_callback(sai))) {
- spin_unlock(&lli->lli_sa_lock);
- break;
- }
- entry = list_entry(sai->sai_interim_entries.next,
- struct sa_entry, se_list);
- list_del_init(&entry->se_list);
- spin_unlock(&lli->lli_sa_lock);
-
- sa_instantiate(sai, entry);
- }
-}
-
-/*
- * callback for async stat, because this is called in ptlrpcd context, we only
- * put sa_entry in sai_cb_entries list, and let sa_handle_callback() to really
- * prepare inode and instantiate sa_entry later.
- */
-static int ll_statahead_interpret(struct ptlrpc_request *req,
- struct md_enqueue_info *minfo, int rc)
-{
- struct lookup_intent *it = &minfo->mi_it;
- struct inode *dir = minfo->mi_dir;
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct sa_entry *entry = (struct sa_entry *)minfo->mi_cbdata;
- __u64 handle = 0;
-
- if (it_disposition(it, DISP_LOOKUP_NEG))
- rc = -ENOENT;
-
- /*
- * because statahead thread will wait for all inflight RPC to finish,
- * sai should be always valid, no need to refcount
- */
- LASSERT(sai);
- LASSERT(entry);
-
- CDEBUG(D_READA, "sa_entry %.*s rc %d\n",
- entry->se_qstr.len, entry->se_qstr.name, rc);
-
- if (rc) {
- ll_intent_release(it);
- iput(dir);
- kfree(minfo);
- } else {
- /*
- * release ibits lock ASAP to avoid deadlock when statahead
- * thread enqueues lock on parent in readdir and another
- * process enqueues lock on child with parent lock held, eg.
- * unlink.
- */
- handle = it->it_lock_handle;
- ll_intent_drop_lock(it);
- }
-
- spin_lock(&lli->lli_sa_lock);
- if (rc) {
- if (__sa_make_ready(sai, entry, rc))
- wake_up(&sai->sai_waitq);
- } else {
- int first = 0;
- entry->se_minfo = minfo;
- entry->se_req = ptlrpc_request_addref(req);
- /*
- * Release the async ibits lock ASAP to avoid deadlock
- * when statahead thread tries to enqueue lock on parent
- * for readpage and other tries to enqueue lock on child
- * with parent's lock held, for example: unlink.
- */
- entry->se_handle = handle;
- if (!sa_has_callback(sai))
- first = 1;
-
- list_add_tail(&entry->se_list, &sai->sai_interim_entries);
-
- if (first && sai->sai_task)
- wake_up_process(sai->sai_task);
- }
- sai->sai_replied++;
-
- spin_unlock(&lli->lli_sa_lock);
-
- return rc;
-}
-
-/* finish async stat RPC arguments */
-static void sa_fini_data(struct md_enqueue_info *minfo)
-{
- iput(minfo->mi_dir);
- kfree(minfo);
-}
-
-/**
- * prepare arguments for async stat RPC.
- */
-static struct md_enqueue_info *
-sa_prep_data(struct inode *dir, struct inode *child, struct sa_entry *entry)
-{
- struct md_enqueue_info *minfo;
- struct ldlm_enqueue_info *einfo;
- struct md_op_data *op_data;
-
- minfo = kzalloc(sizeof(*minfo), GFP_NOFS);
- if (!minfo)
- return ERR_PTR(-ENOMEM);
-
- op_data = ll_prep_md_op_data(&minfo->mi_data, dir, child, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- kfree(minfo);
- return (struct md_enqueue_info *)op_data;
- }
-
- if (!child)
- op_data->op_fid2 = entry->se_fid;
-
- minfo->mi_it.it_op = IT_GETATTR;
- minfo->mi_dir = igrab(dir);
- minfo->mi_cb = ll_statahead_interpret;
- minfo->mi_cbdata = entry;
-
- einfo = &minfo->mi_einfo;
- einfo->ei_type = LDLM_IBITS;
- einfo->ei_mode = it_to_lock_mode(&minfo->mi_it);
- einfo->ei_cb_bl = ll_md_blocking_ast;
- einfo->ei_cb_cp = ldlm_completion_ast;
- einfo->ei_cb_gl = NULL;
- einfo->ei_cbdata = NULL;
-
- return minfo;
-}
-
-/* async stat for file not found in dcache */
-static int sa_lookup(struct inode *dir, struct sa_entry *entry)
-{
- struct md_enqueue_info *minfo;
- int rc;
-
- minfo = sa_prep_data(dir, NULL, entry);
- if (IS_ERR(minfo))
- return PTR_ERR(minfo);
-
- rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo);
- if (rc)
- sa_fini_data(minfo);
-
- return rc;
-}
-
-/**
- * async stat for file found in dcache, similar to .revalidate
- *
- * \retval 1 dentry valid, no RPC sent
- * \retval 0 dentry invalid, will send async stat RPC
- * \retval negative number upon error
- */
-static int sa_revalidate(struct inode *dir, struct sa_entry *entry,
- struct dentry *dentry)
-{
- struct inode *inode = d_inode(dentry);
- struct lookup_intent it = { .it_op = IT_GETATTR,
- .it_lock_handle = 0 };
- struct md_enqueue_info *minfo;
- int rc;
-
- if (unlikely(!inode))
- return 1;
-
- if (d_mountpoint(dentry))
- return 1;
-
- entry->se_inode = igrab(inode);
- rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode),
- NULL);
- if (rc == 1) {
- entry->se_handle = it.it_lock_handle;
- ll_intent_release(&it);
- return 1;
- }
-
- minfo = sa_prep_data(dir, inode, entry);
- if (IS_ERR(minfo)) {
- entry->se_inode = NULL;
- iput(inode);
- return PTR_ERR(minfo);
- }
-
- rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo);
- if (rc) {
- entry->se_inode = NULL;
- iput(inode);
- sa_fini_data(minfo);
- }
-
- return rc;
-}
-
-/* async stat for file with @name */
-static void sa_statahead(struct dentry *parent, const char *name, int len,
- const struct lu_fid *fid)
-{
- struct inode *dir = d_inode(parent);
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct dentry *dentry = NULL;
- struct sa_entry *entry;
- int rc;
-
- entry = sa_alloc(parent, sai, sai->sai_index, name, len, fid);
- if (IS_ERR(entry))
- return;
-
- dentry = d_lookup(parent, &entry->se_qstr);
- if (!dentry) {
- rc = sa_lookup(dir, entry);
- } else {
- rc = sa_revalidate(dir, entry, dentry);
- if (rc == 1 && agl_should_run(sai, d_inode(dentry)))
- ll_agl_add(sai, d_inode(dentry), entry->se_index);
- }
-
- if (dentry)
- dput(dentry);
-
- if (rc)
- sa_make_ready(sai, entry, rc);
- else
- sai->sai_sent++;
-
- sai->sai_index++;
-}
-
-/* async glimpse (agl) thread main function */
-static int ll_agl_thread(void *arg)
-{
- struct dentry *parent = arg;
- struct inode *dir = d_inode(parent);
- struct ll_inode_info *plli = ll_i2info(dir);
- struct ll_inode_info *clli;
- /* We already own this reference, so it is safe to take it without a lock. */
- struct ll_statahead_info *sai = plli->lli_sai;
-
- CDEBUG(D_READA, "agl thread started: sai %p, parent %pd\n",
- sai, parent);
-
- while (!kthread_should_stop()) {
-
- spin_lock(&plli->lli_agl_lock);
- /* The statahead thread maybe help to process AGL entries,
- * so check whether list empty again.
- */
- if (!list_empty(&sai->sai_agls)) {
- clli = list_entry(sai->sai_agls.next,
- struct ll_inode_info, lli_agl_list);
- list_del_init(&clli->lli_agl_list);
- spin_unlock(&plli->lli_agl_lock);
- ll_agl_trigger(&clli->lli_vfs_inode, sai);
- } else {
- spin_unlock(&plli->lli_agl_lock);
- }
-
- set_current_state(TASK_IDLE);
- if (list_empty(&sai->sai_agls) &&
- !kthread_should_stop())
- schedule();
- __set_current_state(TASK_RUNNING);
- }
-
- spin_lock(&plli->lli_agl_lock);
- sai->sai_agl_valid = 0;
- while (!list_empty(&sai->sai_agls)) {
- clli = list_entry(sai->sai_agls.next,
- struct ll_inode_info, lli_agl_list);
- list_del_init(&clli->lli_agl_list);
- spin_unlock(&plli->lli_agl_lock);
- clli->lli_agl_index = 0;
- iput(&clli->lli_vfs_inode);
- spin_lock(&plli->lli_agl_lock);
- }
- spin_unlock(&plli->lli_agl_lock);
- CDEBUG(D_READA, "agl thread stopped: sai %p, parent %pd\n",
- sai, parent);
- ll_sai_put(sai);
- return 0;
-}
-
-/* start agl thread */
-static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
-{
- struct ll_inode_info *plli;
- struct task_struct *task;
-
- CDEBUG(D_READA, "start agl thread: sai %p, parent %pd\n",
- sai, parent);
-
- plli = ll_i2info(d_inode(parent));
- task = kthread_create(ll_agl_thread, parent, "ll_agl_%u",
- plli->lli_opendir_pid);
- if (IS_ERR(task)) {
- CERROR("can't start ll_agl thread, rc: %ld\n", PTR_ERR(task));
- return;
- }
-
- sai->sai_agl_task = task;
- atomic_inc(&ll_i2sbi(d_inode(parent))->ll_agl_total);
- spin_lock(&plli->lli_agl_lock);
- sai->sai_agl_valid = 1;
- spin_unlock(&plli->lli_agl_lock);
- /* Get an extra reference that the thread holds */
- ll_sai_get(d_inode(parent));
-
- wake_up_process(task);
-}
-
-/* statahead thread main function */
-static int ll_statahead_thread(void *arg)
-{
- struct dentry *parent = arg;
- struct inode *dir = d_inode(parent);
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- struct ll_statahead_info *sai = lli->lli_sai;
- struct page *page = NULL;
- __u64 pos = 0;
- int first = 0;
- int rc = 0;
- struct md_op_data *op_data;
-
- CDEBUG(D_READA, "statahead thread starting: sai %p, parent %pd\n",
- sai, parent);
-
- op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
- LUSTRE_OPC_ANY, dir);
- if (IS_ERR(op_data)) {
- rc = PTR_ERR(op_data);
- goto out;
- }
-
- op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
-
- while (pos != MDS_DIR_END_OFF && sai->sai_task) {
- struct lu_dirpage *dp;
- struct lu_dirent *ent;
-
- sai->sai_in_readpage = 1;
- page = ll_get_dir_page(dir, op_data, pos);
- sai->sai_in_readpage = 0;
- if (IS_ERR(page)) {
- rc = PTR_ERR(page);
- CDEBUG(D_READA, "error reading dir " DFID " at %llu/%llu: opendir_pid = %u: rc = %d\n",
- PFID(ll_inode2fid(dir)), pos, sai->sai_index,
- lli->lli_opendir_pid, rc);
- break;
- }
-
- dp = page_address(page);
- for (ent = lu_dirent_start(dp);
- ent && sai->sai_task && !sa_low_hit(sai);
- ent = lu_dirent_next(ent)) {
- struct lu_fid fid;
- __u64 hash;
- int namelen;
- char *name;
-
- hash = le64_to_cpu(ent->lde_hash);
- if (unlikely(hash < pos))
- /*
- * Skip until we find target hash value.
- */
- continue;
-
- namelen = le16_to_cpu(ent->lde_namelen);
- if (unlikely(namelen == 0))
- /*
- * Skip dummy record.
- */
- continue;
-
- name = ent->lde_name;
- if (name[0] == '.') {
- if (namelen == 1) {
- /*
- * skip "."
- */
- continue;
- } else if (name[1] == '.' && namelen == 2) {
- /*
- * skip ".."
- */
- continue;
- } else if (!sai->sai_ls_all) {
- /*
- * skip hidden files.
- */
- sai->sai_skip_hidden++;
- continue;
- }
- }
-
- /*
- * don't stat-ahead first entry.
- */
- if (unlikely(++first == 1))
- continue;
-
- fid_le_to_cpu(&fid, &ent->lde_fid);
-
- do {
- sa_handle_callback(sai);
-
- spin_lock(&lli->lli_agl_lock);
- while (sa_sent_full(sai) &&
- !agl_list_empty(sai)) {
- struct ll_inode_info *clli;
-
- clli = list_entry(sai->sai_agls.next,
- struct ll_inode_info,
- lli_agl_list);
- list_del_init(&clli->lli_agl_list);
- spin_unlock(&lli->lli_agl_lock);
-
- ll_agl_trigger(&clli->lli_vfs_inode,
- sai);
-
- spin_lock(&lli->lli_agl_lock);
- }
- spin_unlock(&lli->lli_agl_lock);
-
- set_current_state(TASK_IDLE);
- if (sa_sent_full(sai) &&
- !sa_has_callback(sai) &&
- agl_list_empty(sai) &&
- sai->sai_task)
- /* wait for spare statahead window */
- schedule();
- __set_current_state(TASK_RUNNING);
- } while (sa_sent_full(sai) && sai->sai_task);
-
- sa_statahead(parent, name, namelen, &fid);
- }
-
- pos = le64_to_cpu(dp->ldp_hash_end);
- ll_release_page(dir, page,
- le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
-
- if (sa_low_hit(sai)) {
- rc = -EFAULT;
- atomic_inc(&sbi->ll_sa_wrong);
- CDEBUG(D_READA, "Statahead for dir " DFID " hit ratio too low: hit/miss %llu/%llu, sent/replied %llu/%llu, stopping statahead thread: pid %d\n",
- PFID(&lli->lli_fid), sai->sai_hit,
- sai->sai_miss, sai->sai_sent,
- sai->sai_replied, current_pid());
- break;
- }
- }
- ll_finish_md_op_data(op_data);
-
- if (rc < 0) {
- spin_lock(&lli->lli_sa_lock);
- sai->sai_task = NULL;
- lli->lli_sa_enabled = 0;
- spin_unlock(&lli->lli_sa_lock);
- }
-
- /*
- * statahead is finished, but statahead entries need to be cached, wait
- * for file release to stop me.
- */
- while (sai->sai_task) {
- sa_handle_callback(sai);
-
- set_current_state(TASK_IDLE);
- if (!sa_has_callback(sai) &&
- sai->sai_task)
- schedule();
- __set_current_state(TASK_RUNNING);
- }
-out:
- if (sai->sai_agl_task) {
- kthread_stop(sai->sai_agl_task);
-
- CDEBUG(D_READA, "stop agl thread: sai %p pid %u\n",
- sai, (unsigned int)sai->sai_agl_task->pid);
- sai->sai_agl_task = NULL;
- }
- /*
- * wait for inflight statahead RPCs to finish, and then we can free sai
- * safely because statahead RPC will access sai data
- */
- while (sai->sai_sent != sai->sai_replied) {
- /* in case we're not woken up, timeout wait */
- schedule_timeout_idle(HZ>>3);
- }
-
- /* release resources held by statahead RPCs */
- sa_handle_callback(sai);
-
- CDEBUG(D_READA, "statahead thread stopped: sai %p, parent %pd\n",
- sai, parent);
-
- spin_lock(&lli->lli_sa_lock);
- sai->sai_task = NULL;
- spin_unlock(&lli->lli_sa_lock);
-
- wake_up(&sai->sai_waitq);
- ll_sai_put(sai);
-
- do_exit(rc);
-}
-
-/* authorize opened dir handle @key to statahead */
-void ll_authorize_statahead(struct inode *dir, void *key)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
-
- spin_lock(&lli->lli_sa_lock);
- if (!lli->lli_opendir_key && !lli->lli_sai) {
- /*
- * if lli_sai is not NULL, it means previous statahead is not
- * finished yet, we'd better not start a new statahead for now.
- */
- LASSERT(!lli->lli_opendir_pid);
- lli->lli_opendir_key = key;
- lli->lli_opendir_pid = current_pid();
- lli->lli_sa_enabled = 1;
- }
- spin_unlock(&lli->lli_sa_lock);
-}
-
-/*
- * deauthorize opened dir handle @key to statahead, but statahead thread may
- * still be running, notify it to quit.
- */
-void ll_deauthorize_statahead(struct inode *dir, void *key)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai;
-
- LASSERT(lli->lli_opendir_key == key);
- LASSERT(lli->lli_opendir_pid);
-
- CDEBUG(D_READA, "deauthorize statahead for " DFID "\n",
- PFID(&lli->lli_fid));
-
- spin_lock(&lli->lli_sa_lock);
- lli->lli_opendir_key = NULL;
- lli->lli_opendir_pid = 0;
- lli->lli_sa_enabled = 0;
- sai = lli->lli_sai;
- if (sai && sai->sai_task) {
- /*
- * statahead thread may not quit yet because it needs to cache
- * entries, now it's time to tell it to quit.
- */
- wake_up_process(sai->sai_task);
- sai->sai_task = NULL;
- }
- spin_unlock(&lli->lli_sa_lock);
-}
-
-enum {
- /**
- * not first dirent, or is "."
- */
- LS_NOT_FIRST_DE = 0,
- /**
- * the first non-hidden dirent
- */
- LS_FIRST_DE,
- /**
- * the first hidden dirent, that is "."
- */
- LS_FIRST_DOT_DE
-};
-
-/* file is first dirent under @dir */
-static int is_first_dirent(struct inode *dir, struct dentry *dentry)
-{
- const struct qstr *target = &dentry->d_name;
- struct md_op_data *op_data;
- struct page *page;
- __u64 pos = 0;
- int dot_de;
- int rc = LS_NOT_FIRST_DE;
-
- op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
- LUSTRE_OPC_ANY, dir);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
- /**
- * FIXME choose the start offset of the readdir
- */
- op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
-
- page = ll_get_dir_page(dir, op_data, pos);
-
- while (1) {
- struct lu_dirpage *dp;
- struct lu_dirent *ent;
-
- if (IS_ERR(page)) {
- struct ll_inode_info *lli = ll_i2info(dir);
-
- rc = PTR_ERR(page);
- CERROR("%s: error reading dir " DFID " at %llu: opendir_pid = %u : rc = %d\n",
- ll_get_fsname(dir->i_sb, NULL, 0),
- PFID(ll_inode2fid(dir)), pos,
- lli->lli_opendir_pid, rc);
- break;
- }
-
- dp = page_address(page);
- for (ent = lu_dirent_start(dp); ent;
- ent = lu_dirent_next(ent)) {
- __u64 hash;
- int namelen;
- char *name;
-
- hash = le64_to_cpu(ent->lde_hash);
- /* The ll_get_dir_page() can return any page containing
- * the given hash which may be not the start hash.
- */
- if (unlikely(hash < pos))
- continue;
-
- namelen = le16_to_cpu(ent->lde_namelen);
- if (unlikely(namelen == 0))
- /*
- * skip dummy record.
- */
- continue;
-
- name = ent->lde_name;
- if (name[0] == '.') {
- if (namelen == 1)
- /*
- * skip "."
- */
- continue;
- else if (name[1] == '.' && namelen == 2)
- /*
- * skip ".."
- */
- continue;
- else
- dot_de = 1;
- } else {
- dot_de = 0;
- }
-
- if (dot_de && target->name[0] != '.') {
- CDEBUG(D_READA, "%.*s skip hidden file %.*s\n",
- target->len, target->name,
- namelen, name);
- continue;
- }
-
- if (target->len != namelen ||
- memcmp(target->name, name, namelen) != 0)
- rc = LS_NOT_FIRST_DE;
- else if (!dot_de)
- rc = LS_FIRST_DE;
- else
- rc = LS_FIRST_DOT_DE;
-
- ll_release_page(dir, page, false);
- goto out;
- }
- pos = le64_to_cpu(dp->ldp_hash_end);
- if (pos == MDS_DIR_END_OFF) {
- /*
- * End of directory reached.
- */
- ll_release_page(dir, page, false);
- goto out;
- } else {
- /*
- * chain is exhausted
- * Normal case: continue to the next page.
- */
- ll_release_page(dir, page,
- le32_to_cpu(dp->ldp_flags) &
- LDF_COLLIDE);
- page = ll_get_dir_page(dir, op_data, pos);
- }
- }
-out:
- ll_finish_md_op_data(op_data);
- return rc;
-}
-
-/**
- * revalidate @dentryp from statahead cache
- *
- * \param[in] dir parent directory
- * \param[in] sai sai structure
- * \param[out] dentryp pointer to dentry which will be revalidated
- * \param[in] unplug unplug statahead window only (normally for negative
- * dentry)
- * \retval 1 on success, dentry is saved in @dentryp
- * \retval 0 if revalidation failed (no proper lock on client)
- * \retval negative number upon error
- */
-static int revalidate_statahead_dentry(struct inode *dir,
- struct ll_statahead_info *sai,
- struct dentry **dentryp,
- bool unplug)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct sa_entry *entry = NULL;
- struct ll_dentry_data *ldd;
- int rc = 0;
-
- if ((*dentryp)->d_name.name[0] == '.') {
- if (sai->sai_ls_all ||
- sai->sai_miss_hidden >= sai->sai_skip_hidden) {
- /*
- * Hidden dentry is the first one, or statahead
- * thread does not skip so many hidden dentries
- * before "sai_ls_all" enabled as below.
- */
- } else {
- if (!sai->sai_ls_all)
- /*
- * It maybe because hidden dentry is not
- * the first one, "sai_ls_all" was not
- * set, then "ls -al" missed. Enable
- * "sai_ls_all" for such case.
- */
- sai->sai_ls_all = 1;
-
- /*
- * Such "getattr" has been skipped before
- * "sai_ls_all" enabled as above.
- */
- sai->sai_miss_hidden++;
- return -EAGAIN;
- }
- }
-
- if (unplug) {
- rc = 1;
- goto out_unplug;
- }
-
- entry = sa_get(sai, &(*dentryp)->d_name);
- if (!entry) {
- rc = -EAGAIN;
- goto out_unplug;
- }
-
- /* if statahead is busy in readdir, help it do post-work */
- if (!sa_ready(entry) && sai->sai_in_readpage)
- sa_handle_callback(sai);
-
- if (!sa_ready(entry)) {
- spin_lock(&lli->lli_sa_lock);
- sai->sai_index_wait = entry->se_index;
- spin_unlock(&lli->lli_sa_lock);
- if (0 == wait_event_idle_timeout(sai->sai_waitq,
- sa_ready(entry), 30 * HZ)) {
- /*
- * entry may not be ready, so it may be used by inflight
- * statahead RPC, don't free it.
- */
- entry = NULL;
- rc = -EAGAIN;
- goto out_unplug;
- }
- }
-
- if (entry->se_state == SA_ENTRY_SUCC && entry->se_inode) {
- struct inode *inode = entry->se_inode;
- struct lookup_intent it = { .it_op = IT_GETATTR,
- .it_lock_handle = entry->se_handle };
- __u64 bits;
-
- rc = md_revalidate_lock(ll_i2mdexp(dir), &it,
- ll_inode2fid(inode), &bits);
- if (rc == 1) {
- if (!(*dentryp)->d_inode) {
- struct dentry *alias;
-
- alias = ll_splice_alias(inode, *dentryp);
- if (IS_ERR(alias)) {
- ll_intent_release(&it);
- rc = PTR_ERR(alias);
- goto out_unplug;
- }
- *dentryp = alias;
- /**
- * statahead prepared this inode, transfer inode
- * refcount from sa_entry to dentry
- */
- entry->se_inode = NULL;
- } else if ((*dentryp)->d_inode != inode) {
- /* revalidate, but inode is recreated */
- CDEBUG(D_READA,
- "%s: stale dentry %pd inode " DFID ", statahead inode " DFID "\n",
- ll_get_fsname((*dentryp)->d_inode->i_sb,
- NULL, 0),
- *dentryp,
- PFID(ll_inode2fid((*dentryp)->d_inode)),
- PFID(ll_inode2fid(inode)));
- ll_intent_release(&it);
- rc = -ESTALE;
- goto out_unplug;
- }
-
- if ((bits & MDS_INODELOCK_LOOKUP) &&
- d_lustre_invalid(*dentryp))
- d_lustre_revalidate(*dentryp);
- ll_intent_release(&it);
- }
- }
-out_unplug:
- /*
- * statahead cached sa_entry can be used only once, and will be killed
- * right after use, so if lookup/revalidate accessed statahead cache,
- * set dentry ldd_sa_generation to parent lli_sa_generation, later if we
- * stat this file again, we know we've done statahead before, see
- * dentry_may_statahead().
- */
- ldd = ll_d2d(*dentryp);
- ldd->lld_sa_generation = lli->lli_sa_generation;
- sa_put(sai, entry, lli);
- return rc;
-}
-
-/**
- * start statahead thread
- *
- * \param[in] dir parent directory
- * \param[in] dentry dentry that triggers statahead, normally the first
- * dirent under @dir
- * \retval -EAGAIN on success, because when this function is
- * called, it's already in lookup call, so client should
- * do it itself instead of waiting for statahead thread
- * to do it asynchronously.
- * \retval negative number upon error
- */
-static int start_statahead_thread(struct inode *dir, struct dentry *dentry)
-{
- struct ll_inode_info *lli = ll_i2info(dir);
- struct ll_statahead_info *sai = NULL;
- struct task_struct *task;
- struct dentry *parent = dentry->d_parent;
- int rc;
-
- /* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */
- rc = is_first_dirent(dir, dentry);
- if (rc == LS_NOT_FIRST_DE) {
- /* It is not "ls -{a}l" operation, no need statahead for it. */
- rc = -EFAULT;
- goto out;
- }
-
- sai = ll_sai_alloc(parent);
- if (!sai) {
- rc = -ENOMEM;
- goto out;
- }
-
- sai->sai_ls_all = (rc == LS_FIRST_DOT_DE);
- /*
- * if current lli_opendir_key was deauthorized, or dir re-opened by
- * another process, don't start statahead, otherwise the newly spawned
- * statahead thread won't be notified to quit.
- */
- spin_lock(&lli->lli_sa_lock);
- if (unlikely(lli->lli_sai || lli->lli_opendir_key ||
- lli->lli_opendir_pid != current->pid)) {
- spin_unlock(&lli->lli_sa_lock);
- rc = -EPERM;
- goto out;
- }
- lli->lli_sai = sai;
- spin_unlock(&lli->lli_sa_lock);
-
- atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_running);
-
- CDEBUG(D_READA, "start statahead thread: [pid %d] [parent %pd]\n",
- current_pid(), parent);
-
- task = kthread_create(ll_statahead_thread, parent, "ll_sa_%u",
- lli->lli_opendir_pid);
- if (IS_ERR(task)) {
- rc = PTR_ERR(task);
- CERROR("can't start ll_sa thread, rc : %d\n", rc);
- goto out;
- }
-
- if (ll_i2sbi(parent->d_inode)->ll_flags & LL_SBI_AGL_ENABLED)
- ll_start_agl(parent, sai);
-
- atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_total);
- sai->sai_task = task;
-
- wake_up_process(task);
-
- /*
- * We don't stat-ahead for the first dirent since we are already in
- * lookup.
- */
- return -EAGAIN;
-
-out:
- /*
- * once we start statahead thread failed, disable statahead so
- * that subsequent stat won't waste time to try it.
- */
- spin_lock(&lli->lli_sa_lock);
- lli->lli_sa_enabled = 0;
- lli->lli_sai = NULL;
- spin_unlock(&lli->lli_sa_lock);
- if (sai)
- ll_sai_free(sai);
- return rc;
-}
-
-/**
- * statahead entry function, this is called when client getattr on a file, it
- * will start statahead thread if this is the first dir entry, else revalidate
- * dentry from statahead cache.
- *
- * \param[in] dir parent directory
- * \param[out] dentryp dentry to getattr
- * \param[in] unplug unplug statahead window only (normally for negative
- * dentry)
- * \retval 1 on success
- * \retval 0 revalidation from statahead cache failed, caller needs
- * to getattr from server directly
- * \retval negative number on error, caller often ignores this and
- * then getattr from server
- */
-int ll_statahead(struct inode *dir, struct dentry **dentryp, bool unplug)
-{
- struct ll_statahead_info *sai;
-
- sai = ll_sai_get(dir);
- if (sai) {
- int rc;
-
- rc = revalidate_statahead_dentry(dir, sai, dentryp, unplug);
- CDEBUG(D_READA, "revalidate statahead %pd: %d.\n",
- *dentryp, rc);
- ll_sai_put(sai);
- return rc;
- }
- return start_statahead_thread(dir, *dentryp);
-}
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
deleted file mode 100644
index 861e7a60f408..000000000000
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <lustre_ha.h>
-#include <lustre_dlm.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <lprocfs_status.h>
-#include "llite_internal.h"
-
-static struct kmem_cache *ll_inode_cachep;
-
-static struct inode *ll_alloc_inode(struct super_block *sb)
-{
- struct ll_inode_info *lli;
-
- ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_ALLOC_INODE, 1);
- lli = kmem_cache_zalloc(ll_inode_cachep, GFP_NOFS);
- if (!lli)
- return NULL;
-
- inode_init_once(&lli->lli_vfs_inode);
- return &lli->lli_vfs_inode;
-}
-
-static void ll_inode_destroy_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- struct ll_inode_info *ptr = ll_i2info(inode);
-
- kmem_cache_free(ll_inode_cachep, ptr);
-}
-
-static void ll_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, ll_inode_destroy_callback);
-}
-
-/* exported operations */
-struct super_operations lustre_super_operations = {
- .alloc_inode = ll_alloc_inode,
- .destroy_inode = ll_destroy_inode,
- .evict_inode = ll_delete_inode,
- .put_super = ll_put_super,
- .statfs = ll_statfs,
- .umount_begin = ll_umount_begin,
- .remount_fs = ll_remount_fs,
- .show_options = ll_show_options,
-};
-MODULE_ALIAS_FS("lustre");
-
-static int __init lustre_init(void)
-{
- int rc;
-
- BUILD_BUG_ON(sizeof(LUSTRE_VOLATILE_HDR) !=
- LUSTRE_VOLATILE_HDR_LEN + 1);
-
- /* print an address of _any_ initialized kernel symbol from this
- * module, to allow debugging with gdb that doesn't support data
- * symbols from modules.
- */
- CDEBUG(D_INFO, "Lustre client module (%p).\n",
- &lustre_super_operations);
-
- rc = -ENOMEM;
- ll_inode_cachep = kmem_cache_create("lustre_inode_cache",
- sizeof(struct ll_inode_info), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
- NULL);
- if (!ll_inode_cachep)
- goto out_cache;
-
- ll_file_data_slab = kmem_cache_create("ll_file_data",
- sizeof(struct ll_file_data), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!ll_file_data_slab)
- goto out_cache;
-
- llite_root = debugfs_create_dir("llite", debugfs_lustre_root);
- if (IS_ERR_OR_NULL(llite_root)) {
- rc = llite_root ? PTR_ERR(llite_root) : -ENOMEM;
- llite_root = NULL;
- goto out_cache;
- }
-
- llite_kset = kset_create_and_add("llite", NULL, lustre_kobj);
- if (!llite_kset) {
- rc = -ENOMEM;
- goto out_debugfs;
- }
-
- rc = vvp_global_init();
- if (rc != 0)
- goto out_sysfs;
-
- cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck,
- LCT_REMEMBER | LCT_NOREF);
- if (IS_ERR(cl_inode_fini_env)) {
- rc = PTR_ERR(cl_inode_fini_env);
- goto out_vvp;
- }
-
- cl_inode_fini_env->le_ctx.lc_cookie = 0x4;
-
- rc = ll_xattr_init();
- if (rc != 0)
- goto out_inode_fini_env;
-
- lustre_register_super_ops(THIS_MODULE, ll_fill_super, ll_kill_super);
- lustre_register_client_process_config(ll_process_config);
-
- return 0;
-
-out_inode_fini_env:
- cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
-out_vvp:
- vvp_global_fini();
-out_sysfs:
- kset_unregister(llite_kset);
-out_debugfs:
- debugfs_remove(llite_root);
-out_cache:
- kmem_cache_destroy(ll_inode_cachep);
- kmem_cache_destroy(ll_file_data_slab);
- return rc;
-}
-
-static void __exit lustre_exit(void)
-{
- lustre_register_super_ops(NULL, NULL, NULL);
- lustre_register_client_process_config(NULL);
-
- debugfs_remove(llite_root);
- kset_unregister(llite_kset);
-
- ll_xattr_fini();
- cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck);
- vvp_global_fini();
-
- kmem_cache_destroy(ll_inode_cachep);
- kmem_cache_destroy(ll_file_data_slab);
-}
-
-MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Client File System");
-MODULE_VERSION(LUSTRE_VERSION_STRING);
-MODULE_LICENSE("GPL");
-
-module_init(lustre_init);
-module_exit(lustre_exit);
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
deleted file mode 100644
index 0690fdbf49f5..000000000000
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/stat.h>
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include "llite_internal.h"
-
-static int ll_readlink_internal(struct inode *inode,
- struct ptlrpc_request **request, char **symname)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc, symlen = i_size_read(inode) + 1;
- struct mdt_body *body;
- struct md_op_data *op_data;
-
- *request = NULL;
-
- if (lli->lli_symlink_name) {
- int print_limit = min_t(int, PAGE_SIZE - 128, symlen);
-
- *symname = lli->lli_symlink_name;
- /* If the total CDEBUG() size is larger than a page, it
- * will print a warning to the console, avoid this by
- * printing just the last part of the symlink.
- */
- CDEBUG(D_INODE, "using cached symlink %s%.*s, len = %d\n",
- print_limit < symlen ? "..." : "", print_limit,
- (*symname) + symlen - print_limit, symlen);
- return 0;
- }
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, symlen,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- return PTR_ERR(op_data);
-
- op_data->op_valid = OBD_MD_LINKNAME;
- rc = md_getattr(sbi->ll_md_exp, op_data, request);
- ll_finish_md_op_data(op_data);
- if (rc) {
- if (rc != -ENOENT)
- CERROR("%s: inode " DFID ": rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), rc);
- goto failed;
- }
-
- body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
- if ((body->mbo_valid & OBD_MD_LINKNAME) == 0) {
- CERROR("OBD_MD_LINKNAME not set on reply\n");
- rc = -EPROTO;
- goto failed;
- }
-
- LASSERT(symlen != 0);
- if (body->mbo_eadatasize != symlen) {
- CERROR("%s: inode " DFID ": symlink length %d not expected %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), body->mbo_eadatasize - 1,
- symlen - 1);
- rc = -EPROTO;
- goto failed;
- }
-
- *symname = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_MD);
- if (!*symname ||
- strnlen(*symname, symlen) != symlen - 1) {
- /* not full/NULL terminated */
- CERROR("inode %lu: symlink not NULL terminated string of length %d\n",
- inode->i_ino, symlen - 1);
- rc = -EPROTO;
- goto failed;
- }
-
- lli->lli_symlink_name = kzalloc(symlen, GFP_NOFS);
- /* do not return an error if we cannot cache the symlink locally */
- if (lli->lli_symlink_name) {
- memcpy(lli->lli_symlink_name, *symname, symlen);
- *symname = lli->lli_symlink_name;
- }
- return 0;
-
-failed:
- return rc;
-}
-
-static void ll_put_link(void *p)
-{
- ptlrpc_req_finished(p);
-}
-
-static const char *ll_get_link(struct dentry *dentry,
- struct inode *inode,
- struct delayed_call *done)
-{
- struct ptlrpc_request *request = NULL;
- int rc;
- char *symname = NULL;
-
- if (!dentry)
- return ERR_PTR(-ECHILD);
-
- CDEBUG(D_VFSTRACE, "VFS Op\n");
- ll_inode_size_lock(inode);
- rc = ll_readlink_internal(inode, &request, &symname);
- ll_inode_size_unlock(inode);
- if (rc) {
- ptlrpc_req_finished(request);
- return ERR_PTR(rc);
- }
-
- /* symname may contain a pointer to the request message buffer,
- * we delay request releasing then.
- */
- set_delayed_call(done, ll_put_link, request);
- return symname;
-}
-
-const struct inode_operations ll_fast_symlink_inode_operations = {
- .setattr = ll_setattr,
- .get_link = ll_get_link,
- .getattr = ll_getattr,
- .permission = ll_inode_permission,
- .listxattr = ll_listxattr,
-};
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
deleted file mode 100644
index 987c03b058e6..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ /dev/null
@@ -1,659 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl_device and cl_device_type implementation for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@intel.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd.h>
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Vvp device and device type functions.
- *
- */
-
-/*
- * vvp_ prefix stands for "Vfs Vm Posix". It corresponds to historical
- * "llite_" (var. "ll_") prefix.
- */
-
-static struct kmem_cache *ll_thread_kmem;
-struct kmem_cache *vvp_lock_kmem;
-struct kmem_cache *vvp_object_kmem;
-static struct kmem_cache *vvp_session_kmem;
-static struct kmem_cache *vvp_thread_kmem;
-
-static struct lu_kmem_descr vvp_caches[] = {
- {
- .ckd_cache = &ll_thread_kmem,
- .ckd_name = "ll_thread_kmem",
- .ckd_size = sizeof(struct ll_thread_info),
- },
- {
- .ckd_cache = &vvp_lock_kmem,
- .ckd_name = "vvp_lock_kmem",
- .ckd_size = sizeof(struct vvp_lock),
- },
- {
- .ckd_cache = &vvp_object_kmem,
- .ckd_name = "vvp_object_kmem",
- .ckd_size = sizeof(struct vvp_object),
- },
- {
- .ckd_cache = &vvp_session_kmem,
- .ckd_name = "vvp_session_kmem",
- .ckd_size = sizeof(struct vvp_session)
- },
- {
- .ckd_cache = &vvp_thread_kmem,
- .ckd_name = "vvp_thread_kmem",
- .ckd_size = sizeof(struct vvp_thread_info),
- },
- {
- .ckd_cache = NULL
- }
-};
-
-static void *ll_thread_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct vvp_thread_info *info;
-
- info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS);
- if (!info)
- info = ERR_PTR(-ENOMEM);
- return info;
-}
-
-static void ll_thread_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct vvp_thread_info *info = data;
-
- kmem_cache_free(ll_thread_kmem, info);
-}
-
-struct lu_context_key ll_thread_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = ll_thread_key_init,
- .lct_fini = ll_thread_key_fini
-};
-
-static void *vvp_session_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct vvp_session *session;
-
- session = kmem_cache_zalloc(vvp_session_kmem, GFP_NOFS);
- if (!session)
- session = ERR_PTR(-ENOMEM);
- return session;
-}
-
-static void vvp_session_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct vvp_session *session = data;
-
- kmem_cache_free(vvp_session_kmem, session);
-}
-
-struct lu_context_key vvp_session_key = {
- .lct_tags = LCT_SESSION,
- .lct_init = vvp_session_key_init,
- .lct_fini = vvp_session_key_fini
-};
-
-static void *vvp_thread_key_init(const struct lu_context *ctx,
- struct lu_context_key *key)
-{
- struct vvp_thread_info *vti;
-
- vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
- if (!vti)
- vti = ERR_PTR(-ENOMEM);
- return vti;
-}
-
-static void vvp_thread_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data)
-{
- struct vvp_thread_info *vti = data;
-
- kmem_cache_free(vvp_thread_kmem, vti);
-}
-
-struct lu_context_key vvp_thread_key = {
- .lct_tags = LCT_CL_THREAD,
- .lct_init = vvp_thread_key_init,
- .lct_fini = vvp_thread_key_fini
-};
-
-/* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
-LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key);
-
-static const struct lu_device_operations vvp_lu_ops = {
- .ldo_object_alloc = vvp_object_alloc
-};
-
-static struct lu_device *vvp_device_free(const struct lu_env *env,
- struct lu_device *d)
-{
- struct vvp_device *vdv = lu2vvp_dev(d);
- struct cl_site *site = lu2cl_site(d->ld_site);
- struct lu_device *next = cl2lu_dev(vdv->vdv_next);
-
- if (d->ld_site) {
- cl_site_fini(site);
- kfree(site);
- }
- cl_device_fini(lu2cl_dev(d));
- kfree(vdv);
- return next;
-}
-
-static struct lu_device *vvp_device_alloc(const struct lu_env *env,
- struct lu_device_type *t,
- struct lustre_cfg *cfg)
-{
- struct vvp_device *vdv;
- struct lu_device *lud;
- struct cl_site *site;
- int rc;
-
- vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
- if (!vdv)
- return ERR_PTR(-ENOMEM);
-
- lud = &vdv->vdv_cl.cd_lu_dev;
- cl_device_init(&vdv->vdv_cl, t);
- vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops;
-
- site = kzalloc(sizeof(*site), GFP_NOFS);
- if (site) {
- rc = cl_site_init(site, &vdv->vdv_cl);
- if (rc == 0) {
- rc = lu_site_init_finish(&site->cs_lu);
- } else {
- LASSERT(!lud->ld_site);
- CERROR("Cannot init lu_site, rc %d.\n", rc);
- kfree(site);
- }
- } else {
- rc = -ENOMEM;
- }
- if (rc != 0) {
- vvp_device_free(env, lud);
- lud = ERR_PTR(rc);
- }
- return lud;
-}
-
-static int vvp_device_init(const struct lu_env *env, struct lu_device *d,
- const char *name, struct lu_device *next)
-{
- struct vvp_device *vdv;
- int rc;
-
- vdv = lu2vvp_dev(d);
- vdv->vdv_next = lu2cl_dev(next);
-
- LASSERT(d->ld_site && next->ld_type);
- next->ld_site = d->ld_site;
- rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
- next->ld_type->ldt_name,
- NULL);
- if (rc == 0) {
- lu_device_get(next);
- lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
- }
- return rc;
-}
-
-static struct lu_device *vvp_device_fini(const struct lu_env *env,
- struct lu_device *d)
-{
- return cl2lu_dev(lu2vvp_dev(d)->vdv_next);
-}
-
-static const struct lu_device_type_operations vvp_device_type_ops = {
- .ldto_init = vvp_type_init,
- .ldto_fini = vvp_type_fini,
-
- .ldto_start = vvp_type_start,
- .ldto_stop = vvp_type_stop,
-
- .ldto_device_alloc = vvp_device_alloc,
- .ldto_device_free = vvp_device_free,
- .ldto_device_init = vvp_device_init,
- .ldto_device_fini = vvp_device_fini,
-};
-
-struct lu_device_type vvp_device_type = {
- .ldt_tags = LU_DEVICE_CL,
- .ldt_name = LUSTRE_VVP_NAME,
- .ldt_ops = &vvp_device_type_ops,
- .ldt_ctx_tags = LCT_CL_THREAD
-};
-
-/**
- * A mutex serializing calls to vvp_inode_fini() under extreme memory
- * pressure, when environments cannot be allocated.
- */
-int vvp_global_init(void)
-{
- int rc;
-
- rc = lu_kmem_init(vvp_caches);
- if (rc != 0)
- return rc;
-
- rc = lu_device_type_init(&vvp_device_type);
- if (rc != 0)
- goto out_kmem;
-
- return 0;
-
-out_kmem:
- lu_kmem_fini(vvp_caches);
-
- return rc;
-}
-
-void vvp_global_fini(void)
-{
- lu_device_type_fini(&vvp_device_type);
- lu_kmem_fini(vvp_caches);
-}
-
-/*****************************************************************************
- *
- * mirror obd-devices into cl devices.
- *
- */
-
-int cl_sb_init(struct super_block *sb)
-{
- struct ll_sb_info *sbi;
- struct cl_device *cl;
- struct lu_env *env;
- int rc = 0;
- u16 refcheck;
-
- sbi = ll_s2sbi(sb);
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- cl = cl_type_setup(env, NULL, &vvp_device_type,
- sbi->ll_dt_exp->exp_obd->obd_lu_dev);
- if (!IS_ERR(cl)) {
- sbi->ll_cl = cl;
- sbi->ll_site = cl2lu_dev(cl)->ld_site;
- }
- cl_env_put(env, &refcheck);
- } else {
- rc = PTR_ERR(env);
- }
- return rc;
-}
-
-int cl_sb_fini(struct super_block *sb)
-{
- struct ll_sb_info *sbi;
- struct lu_env *env;
- struct cl_device *cld;
- u16 refcheck;
- int result;
-
- sbi = ll_s2sbi(sb);
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- cld = sbi->ll_cl;
-
- if (cld) {
- cl_stack_fini(env, cld);
- sbi->ll_cl = NULL;
- sbi->ll_site = NULL;
- }
- cl_env_put(env, &refcheck);
- result = 0;
- } else {
- CERROR("Cannot cleanup cl-stack due to memory shortage.\n");
- result = PTR_ERR(env);
- }
- return result;
-}
-
-/****************************************************************************
- *
- * debugfs/lustre/llite/$MNT/dump_page_cache
- *
- ****************************************************************************/
-
-/*
- * To represent contents of a page cache as a byte stream, following
- * information if encoded in 64bit offset:
- *
- * - file hash bucket in lu_site::ls_hash[] 28bits
- *
- * - how far file is from bucket head 4bits
- *
- * - page index 32bits
- *
- * First two data identify a file in the cache uniquely.
- */
-
-#define PGC_OBJ_SHIFT (32 + 4)
-#define PGC_DEPTH_SHIFT (32)
-
-struct vvp_pgcache_id {
- unsigned int vpi_bucket;
- unsigned int vpi_depth;
- u32 vpi_index;
-
- unsigned int vpi_curdep;
- struct lu_object_header *vpi_obj;
-};
-
-static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id)
-{
- BUILD_BUG_ON(sizeof(pos) != sizeof(__u64));
-
- id->vpi_index = pos & 0xffffffff;
- id->vpi_depth = (pos >> PGC_DEPTH_SHIFT) & 0xf;
- id->vpi_bucket = (unsigned long long)pos >> PGC_OBJ_SHIFT;
-}
-
-static loff_t vvp_pgcache_id_pack(struct vvp_pgcache_id *id)
-{
- return
- ((__u64)id->vpi_index) |
- ((__u64)id->vpi_depth << PGC_DEPTH_SHIFT) |
- ((__u64)id->vpi_bucket << PGC_OBJ_SHIFT);
-}
-
-static int vvp_pgcache_obj_get(struct cfs_hash *hs, struct cfs_hash_bd *bd,
- struct hlist_node *hnode, void *data)
-{
- struct vvp_pgcache_id *id = data;
- struct lu_object_header *hdr = cfs_hash_object(hs, hnode);
-
- if (id->vpi_curdep-- > 0)
- return 0; /* continue */
-
- if (lu_object_is_dying(hdr))
- return 1;
-
- cfs_hash_get(hs, hnode);
- id->vpi_obj = hdr;
- return 1;
-}
-
-static struct cl_object *vvp_pgcache_obj(const struct lu_env *env,
- struct lu_device *dev,
- struct vvp_pgcache_id *id)
-{
- LASSERT(lu_device_is_cl(dev));
-
- id->vpi_depth &= 0xf;
- id->vpi_obj = NULL;
- id->vpi_curdep = id->vpi_depth;
-
- cfs_hash_hlist_for_each(dev->ld_site->ls_obj_hash, id->vpi_bucket,
- vvp_pgcache_obj_get, id);
- if (id->vpi_obj) {
- struct lu_object *lu_obj;
-
- lu_obj = lu_object_locate(id->vpi_obj, dev->ld_type);
- if (lu_obj) {
- lu_object_ref_add(lu_obj, "dump", current);
- return lu2cl(lu_obj);
- }
- lu_object_put(env, lu_object_top(id->vpi_obj));
-
- } else if (id->vpi_curdep > 0) {
- id->vpi_depth = 0xf;
- }
- return NULL;
-}
-
-static loff_t vvp_pgcache_find(const struct lu_env *env,
- struct lu_device *dev, loff_t pos)
-{
- struct cl_object *clob;
- struct lu_site *site;
- struct vvp_pgcache_id id;
-
- site = dev->ld_site;
- vvp_pgcache_id_unpack(pos, &id);
-
- while (1) {
- if (id.vpi_bucket >= CFS_HASH_NHLIST(site->ls_obj_hash))
- return ~0ULL;
- clob = vvp_pgcache_obj(env, dev, &id);
- if (clob) {
- struct inode *inode = vvp_object_inode(clob);
- struct page *vmpage;
- int nr;
-
- nr = find_get_pages_contig(inode->i_mapping,
- id.vpi_index, 1, &vmpage);
- if (nr > 0) {
- id.vpi_index = vmpage->index;
- /* Cant support over 16T file */
- nr = !(vmpage->index > 0xffffffff);
- put_page(vmpage);
- }
-
- lu_object_ref_del(&clob->co_lu, "dump", current);
- cl_object_put(env, clob);
- if (nr > 0)
- return vvp_pgcache_id_pack(&id);
- }
- /* to the next object. */
- ++id.vpi_depth;
- id.vpi_depth &= 0xf;
- if (id.vpi_depth == 0 && ++id.vpi_bucket == 0)
- return ~0ULL;
- id.vpi_index = 0;
- }
-}
-
-#define seq_page_flag(seq, page, flag, has_flags) do { \
- if (test_bit(PG_##flag, &(page)->flags)) { \
- seq_printf(seq, "%s"#flag, has_flags ? "|" : ""); \
- has_flags = 1; \
- } \
-} while (0)
-
-static void vvp_pgcache_page_show(const struct lu_env *env,
- struct seq_file *seq, struct cl_page *page)
-{
- struct vvp_page *vpg;
- struct page *vmpage;
- int has_flags;
-
- vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
- vmpage = vpg->vpg_page;
- seq_printf(seq, " %5i | %p %p %s %s %s | %p " DFID "(%p) %lu %u [",
- 0 /* gen */,
- vpg, page,
- "none",
- vpg->vpg_defer_uptodate ? "du" : "- ",
- PageWriteback(vmpage) ? "wb" : "-",
- vmpage, PFID(ll_inode2fid(vmpage->mapping->host)),
- vmpage->mapping->host, vmpage->index,
- page_count(vmpage));
- has_flags = 0;
- seq_page_flag(seq, vmpage, locked, has_flags);
- seq_page_flag(seq, vmpage, error, has_flags);
- seq_page_flag(seq, vmpage, referenced, has_flags);
- seq_page_flag(seq, vmpage, uptodate, has_flags);
- seq_page_flag(seq, vmpage, dirty, has_flags);
- seq_page_flag(seq, vmpage, writeback, has_flags);
- seq_printf(seq, "%s]\n", has_flags ? "" : "-");
-}
-
-static int vvp_pgcache_show(struct seq_file *f, void *v)
-{
- loff_t pos;
- struct ll_sb_info *sbi;
- struct cl_object *clob;
- struct lu_env *env;
- struct vvp_pgcache_id id;
- u16 refcheck;
- int result;
-
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- pos = *(loff_t *)v;
- vvp_pgcache_id_unpack(pos, &id);
- sbi = f->private;
- clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
- if (clob) {
- struct inode *inode = vvp_object_inode(clob);
- struct cl_page *page = NULL;
- struct page *vmpage;
-
- result = find_get_pages_contig(inode->i_mapping,
- id.vpi_index, 1,
- &vmpage);
- if (result > 0) {
- lock_page(vmpage);
- page = cl_vmpage_page(vmpage, clob);
- unlock_page(vmpage);
- put_page(vmpage);
- }
-
- seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
- PFID(lu_object_fid(&clob->co_lu)));
- if (page) {
- vvp_pgcache_page_show(env, f, page);
- cl_page_put(env, page);
- } else {
- seq_puts(f, "missing\n");
- }
- lu_object_ref_del(&clob->co_lu, "dump", current);
- cl_object_put(env, clob);
- } else {
- seq_printf(f, "%llx missing\n", pos);
- }
- cl_env_put(env, &refcheck);
- result = 0;
- } else {
- result = PTR_ERR(env);
- }
- return result;
-}
-
-static void *vvp_pgcache_start(struct seq_file *f, loff_t *pos)
-{
- struct ll_sb_info *sbi;
- struct lu_env *env;
- u16 refcheck;
-
- sbi = f->private;
-
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- sbi = f->private;
- if (sbi->ll_site->ls_obj_hash->hs_cur_bits >
- 64 - PGC_OBJ_SHIFT) {
- pos = ERR_PTR(-EFBIG);
- } else {
- *pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev,
- *pos);
- if (*pos == ~0ULL)
- pos = NULL;
- }
- cl_env_put(env, &refcheck);
- }
- return pos;
-}
-
-static void *vvp_pgcache_next(struct seq_file *f, void *v, loff_t *pos)
-{
- struct ll_sb_info *sbi;
- struct lu_env *env;
- u16 refcheck;
-
- env = cl_env_get(&refcheck);
- if (!IS_ERR(env)) {
- sbi = f->private;
- *pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev, *pos + 1);
- if (*pos == ~0ULL)
- pos = NULL;
- cl_env_put(env, &refcheck);
- }
- return pos;
-}
-
-static void vvp_pgcache_stop(struct seq_file *f, void *v)
-{
- /* Nothing to do */
-}
-
-static const struct seq_operations vvp_pgcache_ops = {
- .start = vvp_pgcache_start,
- .next = vvp_pgcache_next,
- .stop = vvp_pgcache_stop,
- .show = vvp_pgcache_show
-};
-
-static int vvp_dump_pgcache_seq_open(struct inode *inode, struct file *filp)
-{
- struct seq_file *seq;
- int rc;
-
- rc = seq_open(filp, &vvp_pgcache_ops);
- if (rc)
- return rc;
-
- seq = filp->private_data;
- seq->private = inode->i_private;
-
- return 0;
-}
-
-const struct file_operations vvp_dump_pgcache_file_ops = {
- .owner = THIS_MODULE,
- .open = vvp_dump_pgcache_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
deleted file mode 100644
index 02ea5161d635..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ /dev/null
@@ -1,321 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2013, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Internal definitions for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#ifndef VVP_INTERNAL_H
-#define VVP_INTERNAL_H
-
-#include <uapi/linux/lustre/lustre_idl.h>
-#include <cl_object.h>
-
-enum obd_notify_event;
-struct inode;
-struct lustre_md;
-struct obd_device;
-struct obd_export;
-struct page;
-
-/**
- * IO state private to IO state private to VVP layer.
- */
-struct vvp_io {
- /** super class */
- struct cl_io_slice vui_cl;
- struct cl_io_lock_link vui_link;
- /**
- * I/O vector information to or from which read/write is going.
- */
- struct iov_iter *vui_iter;
- /**
- * Total size for the left IO.
- */
- size_t vui_tot_count;
-
- union {
- struct vvp_fault_io {
- /**
- * Inode modification time that is checked across DLM
- * lock request.
- */
- time64_t ft_mtime;
- struct vm_area_struct *ft_vma;
- /**
- * locked page returned from vvp_io
- */
- struct page *ft_vmpage;
- /**
- * kernel fault info
- */
- struct vm_fault *ft_vmf;
- /**
- * fault API used bitflags for return code.
- */
- unsigned int ft_flags;
- /**
- * check that flags are from filemap_fault
- */
- bool ft_flags_valid;
- } fault;
- struct {
- struct cl_page_list vui_queue;
- unsigned long vui_written;
- int vui_from;
- int vui_to;
- } write;
- } u;
-
- /**
- * Layout version when this IO is initialized
- */
- __u32 vui_layout_gen;
- /**
- * File descriptor against which IO is done.
- */
- struct ll_file_data *vui_fd;
- struct kiocb *vui_iocb;
-
- /* Readahead state. */
- pgoff_t vui_ra_start;
- pgoff_t vui_ra_count;
- /* Set when vui_ra_{start,count} have been initialized. */
- bool vui_ra_valid;
-};
-
-extern struct lu_device_type vvp_device_type;
-
-extern struct lu_context_key vvp_session_key;
-extern struct lu_context_key vvp_thread_key;
-
-extern struct kmem_cache *vvp_lock_kmem;
-extern struct kmem_cache *vvp_object_kmem;
-
-struct vvp_thread_info {
- struct cl_lock vti_lock;
- struct cl_lock_descr vti_descr;
- struct cl_io vti_io;
- struct cl_attr vti_attr;
-};
-
-static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
-{
- struct vvp_thread_info *vti;
-
- vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key);
- LASSERT(vti);
-
- return vti;
-}
-
-static inline struct cl_lock *vvp_env_lock(const struct lu_env *env)
-{
- struct cl_lock *lock = &vvp_env_info(env)->vti_lock;
-
- memset(lock, 0, sizeof(*lock));
- return lock;
-}
-
-static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env)
-{
- struct cl_attr *attr = &vvp_env_info(env)->vti_attr;
-
- memset(attr, 0, sizeof(*attr));
-
- return attr;
-}
-
-static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env)
-{
- struct cl_io *io = &vvp_env_info(env)->vti_io;
-
- memset(io, 0, sizeof(*io));
-
- return io;
-}
-
-struct vvp_session {
- struct vvp_io cs_ios;
-};
-
-static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
-{
- struct vvp_session *ses;
-
- ses = lu_context_key_get(env->le_ses, &vvp_session_key);
- LASSERT(ses);
-
- return ses;
-}
-
-static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
-{
- return &vvp_env_session(env)->cs_ios;
-}
-
-/**
- * ccc-private object state.
- */
-struct vvp_object {
- struct cl_object_header vob_header;
- struct cl_object vob_cl;
- struct inode *vob_inode;
-
- /**
- * Number of transient pages. This is no longer protected by i_sem,
- * and needs to be atomic. This is not actually used for anything,
- * and can probably be removed.
- */
- atomic_t vob_transient_pages;
-
- /**
- * Number of outstanding mmaps on this file.
- *
- * \see ll_vm_open(), ll_vm_close().
- */
- atomic_t vob_mmap_cnt;
-
- /**
- * various flags
- * vob_discard_page_warned
- * if pages belonging to this object are discarded when a client
- * is evicted, some debug info will be printed, this flag will be set
- * during processing the first discarded page, then avoid flooding
- * debug message for lots of discarded pages.
- *
- * \see ll_dirty_page_discard_warn.
- */
- unsigned int vob_discard_page_warned:1;
-};
-
-/**
- * VVP-private page state.
- */
-struct vvp_page {
- struct cl_page_slice vpg_cl;
- unsigned int vpg_defer_uptodate:1,
- vpg_ra_used:1;
- /** VM page */
- struct page *vpg_page;
-};
-
-static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice)
-{
- return container_of(slice, struct vvp_page, vpg_cl);
-}
-
-static inline pgoff_t vvp_index(struct vvp_page *vvp)
-{
- return vvp->vpg_cl.cpl_index;
-}
-
-struct vvp_device {
- struct cl_device vdv_cl;
- struct cl_device *vdv_next;
-};
-
-struct vvp_lock {
- struct cl_lock_slice vlk_cl;
-};
-
-void *ccc_key_init(const struct lu_context *ctx,
- struct lu_context_key *key);
-void ccc_key_fini(const struct lu_context *ctx,
- struct lu_context_key *key, void *data);
-
-void ccc_umount(const struct lu_env *env, struct cl_device *dev);
-
-static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv)
-{
- return &vdv->vdv_cl.cd_lu_dev;
-}
-
-static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d)
-{
- return container_of0(d, struct vvp_device, vdv_cl.cd_lu_dev);
-}
-
-static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d)
-{
- return container_of0(d, struct vvp_device, vdv_cl);
-}
-
-static inline struct vvp_object *cl2vvp(const struct cl_object *obj)
-{
- return container_of0(obj, struct vvp_object, vob_cl);
-}
-
-static inline struct vvp_object *lu2vvp(const struct lu_object *obj)
-{
- return container_of0(obj, struct vvp_object, vob_cl.co_lu);
-}
-
-static inline struct inode *vvp_object_inode(const struct cl_object *obj)
-{
- return cl2vvp(obj)->vob_inode;
-}
-
-int vvp_object_invariant(const struct cl_object *obj);
-struct vvp_object *cl_inode2vvp(struct inode *inode);
-
-static inline struct page *cl2vm_page(const struct cl_page_slice *slice)
-{
- return cl2vvp_page(slice)->vpg_page;
-}
-
-static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice)
-{
- return container_of(slice, struct vvp_lock, vlk_cl);
-}
-
-# define CLOBINVRNT(env, clob, expr) \
- ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr)))
-
-int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io);
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
-int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *io);
-int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index);
-struct lu_object *vvp_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
- struct lu_device *dev);
-
-int vvp_global_init(void);
-void vvp_global_fini(void);
-
-extern const struct file_operations vvp_dump_pgcache_file_ops;
-
-#endif /* VVP_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
deleted file mode 100644
index e7a4778e02e4..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ /dev/null
@@ -1,1374 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_io for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-static struct vvp_io *cl2vvp_io(const struct lu_env *env,
- const struct cl_io_slice *slice)
-{
- struct vvp_io *vio;
-
- vio = container_of(slice, struct vvp_io, vui_cl);
- LASSERT(vio == vvp_env_io(env));
-
- return vio;
-}
-
-/**
- * For swapping layout. The file's layout may have changed.
- * To avoid populating pages to a wrong stripe, we have to verify the
- * correctness of layout. It works because swapping layout processes
- * have to acquire group lock.
- */
-static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
- struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct vvp_io *vio = vvp_env_io(env);
- bool rc = true;
-
- switch (io->ci_type) {
- case CIT_READ:
- case CIT_WRITE:
- /* don't need lock here to check lli_layout_gen as we have held
- * extent lock and GROUP lock has to hold to swap layout
- */
- if (ll_layout_version_get(lli) != vio->vui_layout_gen ||
- OBD_FAIL_CHECK_RESET(OBD_FAIL_LLITE_LOST_LAYOUT, 0)) {
- io->ci_need_restart = 1;
- /* this will cause a short read/write */
- io->ci_continue = 0;
- rc = false;
- }
- case CIT_FAULT:
- /* fault is okay because we've already had a page. */
- default:
- break;
- }
-
- return rc;
-}
-
-static void vvp_object_size_lock(struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- ll_inode_size_lock(inode);
- cl_object_attr_lock(obj);
-}
-
-static void vvp_object_size_unlock(struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- cl_object_attr_unlock(obj);
- ll_inode_size_unlock(inode);
-}
-
-/**
- * Helper function that if necessary adjusts file size (inode->i_size), when
- * position at the offset \a pos is accessed. File size can be arbitrary stale
- * on a Lustre client, but client at least knows KMS. If accessed area is
- * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
- *
- * Locking: cl_isize_lock is used to serialize changes to inode size and to
- * protect consistency between inode size and cl_object
- * attributes. cl_object_size_lock() protects consistency between cl_attr's of
- * top-object and sub-objects.
- */
-static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, loff_t start, size_t count,
- int *exceed)
-{
- struct cl_attr *attr = vvp_env_thread_attr(env);
- struct inode *inode = vvp_object_inode(obj);
- loff_t pos = start + count - 1;
- loff_t kms;
- int result;
-
- /*
- * Consistency guarantees: following possibilities exist for the
- * relation between region being accessed and real file size at this
- * moment:
- *
- * (A): the region is completely inside of the file;
- *
- * (B-x): x bytes of region are inside of the file, the rest is
- * outside;
- *
- * (C): the region is completely outside of the file.
- *
- * This classification is stable under DLM lock already acquired by
- * the caller, because to change the class, other client has to take
- * DLM lock conflicting with our lock. Also, any updates to ->i_size
- * by other threads on this client are serialized by
- * ll_inode_size_lock(). This guarantees that short reads are handled
- * correctly in the face of concurrent writes and truncates.
- */
- vvp_object_size_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- if (result == 0) {
- kms = attr->cat_kms;
- if (pos > kms) {
- /*
- * A glimpse is necessary to determine whether we
- * return a short read (B) or some zeroes at the end
- * of the buffer (C)
- */
- vvp_object_size_unlock(obj);
- result = cl_glimpse_lock(env, io, inode, obj, 0);
- if (result == 0 && exceed) {
- /* If objective page index exceed end-of-file
- * page index, return directly. Do not expect
- * kernel will check such case correctly.
- * linux-2.6.18-128.1.1 miss to do that.
- * --bug 17336
- */
- loff_t size = i_size_read(inode);
- loff_t cur_index = start >> PAGE_SHIFT;
- loff_t size_index = (size - 1) >> PAGE_SHIFT;
-
- if ((size == 0 && cur_index != 0) ||
- size_index < cur_index)
- *exceed = 1;
- }
- return result;
- }
- /*
- * region is within kms and, hence, within real file
- * size (A). We need to increase i_size to cover the
- * read region so that generic_file_read() will do its
- * job, but that doesn't mean the kms size is
- * _correct_, it is only the _minimum_ size. If
- * someone does a stat they will get the correct size
- * which will always be >= the kms value here.
- * b=11081
- */
- if (i_size_read(inode) < kms) {
- i_size_write(inode, kms);
- CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
- PFID(lu_object_fid(&obj->co_lu)),
- (__u64)i_size_read(inode));
- }
- }
-
- vvp_object_size_unlock(obj);
-
- return result;
-}
-
-/*****************************************************************************
- *
- * io operations.
- *
- */
-
-static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- pgoff_t start, pgoff_t end)
-{
- struct vvp_io *vio = vvp_env_io(env);
- struct cl_lock_descr *descr = &vio->vui_link.cill_descr;
- struct cl_object *obj = io->ci_obj;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
-
- memset(&vio->vui_link, 0, sizeof(vio->vui_link));
-
- if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- descr->cld_mode = CLM_GROUP;
- descr->cld_gid = vio->vui_fd->fd_grouplock.lg_gid;
- enqflags |= CEF_LOCK_MATCH;
- } else {
- descr->cld_mode = mode;
- }
- descr->cld_obj = obj;
- descr->cld_start = start;
- descr->cld_end = end;
- descr->cld_enq_flags = enqflags;
-
- cl_io_lock_add(env, io, &vio->vui_link);
- return 0;
-}
-
-static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io,
- __u32 enqflags, enum cl_lock_mode mode,
- loff_t start, loff_t end)
-{
- struct cl_object *obj = io->ci_obj;
-
- return vvp_io_one_lock_index(env, io, enqflags, mode,
- cl_index(obj, start), cl_index(obj, end));
-}
-
-static int vvp_io_write_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- cl_page_list_init(&vio->u.write.vui_queue);
- vio->u.write.vui_written = 0;
- vio->u.write.vui_from = 0;
- vio->u.write.vui_to = PAGE_SIZE;
-
- return 0;
-}
-
-static void vvp_io_write_iter_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- LASSERT(vio->u.write.vui_queue.pl_nr == 0);
-}
-
-static int vvp_io_fault_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct inode *inode = vvp_object_inode(ios->cis_obj);
-
- LASSERT(inode == file_inode(vio->vui_fd->fd_file));
- vio->u.fault.ft_mtime = inode->i_mtime.tv_sec;
- return 0;
-}
-
-static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct inode *inode = vvp_object_inode(obj);
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, DFID
- " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
- PFID(lu_object_fid(&obj->co_lu)),
- io->ci_ignore_layout, io->ci_verify_layout,
- vio->vui_layout_gen, io->ci_restore_needed);
-
- if (io->ci_restore_needed) {
- int rc;
-
- /* file was detected release, we need to restore it
- * before finishing the io
- */
- rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF);
- /* if restore registration failed, no restart,
- * we will return -ENODATA
- */
- /* The layout will change after restore, so we need to
- * block on layout lock hold by the MDT
- * as MDT will not send new layout in lvb (see LU-3124)
- * we have to explicitly fetch it, all this will be done
- * by ll_layout_refresh()
- */
- if (rc == 0) {
- io->ci_restore_needed = 0;
- io->ci_need_restart = 1;
- io->ci_verify_layout = 1;
- } else {
- io->ci_restore_needed = 1;
- io->ci_need_restart = 0;
- io->ci_verify_layout = 0;
- io->ci_result = rc;
- }
- }
-
- if (!io->ci_ignore_layout && io->ci_verify_layout) {
- __u32 gen = 0;
-
- /* check layout version */
- ll_layout_refresh(inode, &gen);
- io->ci_need_restart = vio->vui_layout_gen != gen;
- if (io->ci_need_restart) {
- CDEBUG(D_VFSTRACE,
- DFID " layout changed from %d to %d.\n",
- PFID(lu_object_fid(&obj->co_lu)),
- vio->vui_layout_gen, gen);
- /* today successful restore is the only possible case */
- /* restore was done, clear restoring state */
- clear_bit(LLIF_FILE_RESTORING,
- &ll_i2info(inode)->lli_flags);
- }
- }
-}
-
-static void vvp_io_fault_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_page *page = io->u.ci_fault.ft_page;
-
- CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj));
-
- if (page) {
- lu_ref_del(&page->cp_reference, "fault", io);
- cl_page_put(env, page);
- io->u.ci_fault.ft_page = NULL;
- }
- vvp_io_fini(env, ios);
-}
-
-static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma)
-{
- /*
- * we only want to hold PW locks if the mmap() can generate
- * writes back to the file and that only happens in shared
- * writable vmas
- */
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
- return CLM_WRITE;
- return CLM_READ;
-}
-
-static int vvp_mmap_locks(const struct lu_env *env,
- struct vvp_io *vio, struct cl_io *io)
-{
- struct vvp_thread_info *cti = vvp_env_info(env);
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- struct cl_lock_descr *descr = &cti->vti_descr;
- union ldlm_policy_data policy;
- unsigned long addr;
- ssize_t count;
- int result = 0;
- struct iov_iter i;
- struct iovec iov;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- if (!vio->vui_iter) /* nfs or loop back device write */
- return 0;
-
- /* No MM (e.g. NFS)? No vmas too. */
- if (!mm)
- return 0;
-
- iov_for_each(iov, i, *vio->vui_iter) {
- addr = (unsigned long)iov.iov_base;
- count = iov.iov_len;
- if (count == 0)
- continue;
-
- count += addr & (~PAGE_MASK);
- addr &= PAGE_MASK;
-
- down_read(&mm->mmap_sem);
- while ((vma = our_vma(mm, addr, count)) != NULL) {
- struct inode *inode = file_inode(vma->vm_file);
- int flags = CEF_MUST;
-
- if (ll_file_nolock(vma->vm_file)) {
- /*
- * For no lock case is not allowed for mmap
- */
- result = -EINVAL;
- break;
- }
-
- /*
- * XXX: Required lock mode can be weakened: CIT_WRITE
- * io only ever reads user level buffer, and CIT_READ
- * only writes on it.
- */
- policy_from_vma(&policy, vma, addr, count);
- descr->cld_mode = vvp_mode_from_vma(vma);
- descr->cld_obj = ll_i2info(inode)->lli_clob;
- descr->cld_start = cl_index(descr->cld_obj,
- policy.l_extent.start);
- descr->cld_end = cl_index(descr->cld_obj,
- policy.l_extent.end);
- descr->cld_enq_flags = flags;
- result = cl_io_lock_alloc_add(env, io, descr);
-
- CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
- descr->cld_mode, descr->cld_start,
- descr->cld_end);
-
- if (result < 0)
- break;
-
- if (vma->vm_end - addr >= count)
- break;
-
- count -= vma->vm_end - addr;
- addr = vma->vm_end;
- }
- up_read(&mm->mmap_sem);
- if (result < 0)
- break;
- }
- return result;
-}
-
-static void vvp_io_advance(const struct lu_env *env,
- const struct cl_io_slice *ios,
- size_t nob)
-{
- struct cl_object *obj = ios->cis_io->ci_obj;
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- vio->vui_tot_count -= nob;
- iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count);
-}
-
-static void vvp_io_update_iov(const struct lu_env *env,
- struct vvp_io *vio, struct cl_io *io)
-{
- size_t size = io->u.ci_rw.crw_count;
-
- if (!vio->vui_iter)
- return;
-
- iov_iter_truncate(vio->vui_iter, size);
-}
-
-static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
- enum cl_lock_mode mode, loff_t start, loff_t end)
-{
- struct vvp_io *vio = vvp_env_io(env);
- int result;
- int ast_flags = 0;
-
- LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
- vvp_io_update_iov(env, vio, io);
-
- if (io->u.ci_rw.crw_nonblock)
- ast_flags |= CEF_NONBLOCK;
- result = vvp_mmap_locks(env, vio, io);
- if (result == 0)
- result = vvp_io_one_lock(env, io, ast_flags, mode, start, end);
- return result;
-}
-
-static int vvp_io_read_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_io_rw_common *rd = &io->u.ci_rd.rd;
- int result;
-
- result = vvp_io_rw_lock(env, io, CLM_READ, rd->crw_pos,
- rd->crw_pos + rd->crw_count - 1);
-
- return result;
-}
-
-static int vvp_io_fault_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct vvp_io *vio = cl2vvp_io(env, ios);
- /*
- * XXX LDLM_FL_CBPENDING
- */
- return vvp_io_one_lock_index(env,
- io, 0,
- vvp_mode_from_vma(vio->u.fault.ft_vma),
- io->u.ci_fault.ft_index,
- io->u.ci_fault.ft_index);
-}
-
-static int vvp_io_write_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- loff_t start;
- loff_t end;
-
- if (io->u.ci_wr.wr_append) {
- start = 0;
- end = OBD_OBJECT_EOF;
- } else {
- start = io->u.ci_wr.wr.crw_pos;
- end = start + io->u.ci_wr.wr.crw_count - 1;
- }
- return vvp_io_rw_lock(env, io, CLM_WRITE, start, end);
-}
-
-static int vvp_io_setattr_iter_init(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- return 0;
-}
-
-/**
- * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io.
- *
- * Handles "lockless io" mode when extent locking is done by server.
- */
-static int vvp_io_setattr_lock(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- __u64 new_size;
- __u32 enqflags = 0;
-
- if (cl_io_is_trunc(io)) {
- new_size = io->u.ci_setattr.sa_attr.lvb_size;
- if (new_size == 0)
- enqflags = CEF_DISCARD_DATA;
- } else {
- unsigned int valid = io->u.ci_setattr.sa_valid;
-
- if (!(valid & TIMES_SET_FLAGS))
- return 0;
-
- if ((!(valid & ATTR_MTIME) ||
- io->u.ci_setattr.sa_attr.lvb_mtime >=
- io->u.ci_setattr.sa_attr.lvb_ctime) &&
- (!(valid & ATTR_ATIME) ||
- io->u.ci_setattr.sa_attr.lvb_atime >=
- io->u.ci_setattr.sa_attr.lvb_ctime))
- return 0;
- new_size = 0;
- }
-
- return vvp_io_one_lock(env, io, enqflags, CLM_WRITE,
- new_size, OBD_OBJECT_EOF);
-}
-
-static int vvp_do_vmtruncate(struct inode *inode, size_t size)
-{
- int result;
- /*
- * Only ll_inode_size_lock is taken at this level.
- */
- ll_inode_size_lock(inode);
- result = inode_newsize_ok(inode, size);
- if (result < 0) {
- ll_inode_size_unlock(inode);
- return result;
- }
- truncate_setsize(inode, size);
- ll_inode_size_unlock(inode);
- return result;
-}
-
-static int vvp_io_setattr_time(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct cl_attr *attr = vvp_env_thread_attr(env);
- int result;
- unsigned valid = CAT_CTIME;
-
- cl_object_attr_lock(obj);
- attr->cat_ctime = io->u.ci_setattr.sa_attr.lvb_ctime;
- if (io->u.ci_setattr.sa_valid & ATTR_ATIME_SET) {
- attr->cat_atime = io->u.ci_setattr.sa_attr.lvb_atime;
- valid |= CAT_ATIME;
- }
- if (io->u.ci_setattr.sa_valid & ATTR_MTIME_SET) {
- attr->cat_mtime = io->u.ci_setattr.sa_attr.lvb_mtime;
- valid |= CAT_MTIME;
- }
- result = cl_object_attr_update(env, obj, attr, valid);
- cl_object_attr_unlock(obj);
-
- return result;
-}
-
-static int vvp_io_setattr_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct inode *inode = vvp_object_inode(io->ci_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- if (cl_io_is_trunc(io)) {
- down_write(&lli->lli_trunc_sem);
- inode_lock(inode);
- inode_dio_wait(inode);
- } else {
- inode_lock(inode);
- }
-
- if (io->u.ci_setattr.sa_valid & TIMES_SET_FLAGS)
- return vvp_io_setattr_time(env, ios);
-
- return 0;
-}
-
-static void vvp_io_setattr_end(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct cl_io *io = ios->cis_io;
- struct inode *inode = vvp_object_inode(io->ci_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- if (cl_io_is_trunc(io)) {
- /* Truncate in memory pages - they must be clean pages
- * because osc has already notified to destroy osc_extents.
- */
- vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
- inode_unlock(inode);
- up_write(&lli->lli_trunc_sem);
- } else {
- inode_unlock(inode);
- }
-}
-
-static void vvp_io_setattr_fini(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- bool restore_needed = ios->cis_io->ci_restore_needed;
- struct inode *inode = vvp_object_inode(ios->cis_obj);
-
- vvp_io_fini(env, ios);
-
- if (restore_needed && !ios->cis_io->ci_restore_needed) {
- /* restore finished, set data modified flag for HSM */
- set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags);
- }
-}
-
-static int vvp_io_read_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct file *file = vio->vui_fd->fd_file;
-
- int result;
- loff_t pos = io->u.ci_rd.rd.crw_pos;
- long cnt = io->u.ci_rd.rd.crw_count;
- long tot = vio->vui_tot_count;
- int exceed = 0;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
-
- down_read(&lli->lli_trunc_sem);
-
- if (!can_populate_pages(env, io, inode))
- return 0;
-
- result = vvp_prep_size(env, obj, io, pos, tot, &exceed);
- if (result != 0)
- return result;
- if (exceed != 0)
- goto out;
-
- LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu,
- "Read ino %lu, %lu bytes, offset %lld, size %llu\n",
- inode->i_ino, cnt, pos, i_size_read(inode));
-
- /* turn off the kernel's read-ahead */
- vio->vui_fd->fd_file->f_ra.ra_pages = 0;
-
- /* initialize read-ahead window once per syscall */
- if (!vio->vui_ra_valid) {
- vio->vui_ra_valid = true;
- vio->vui_ra_start = cl_index(obj, pos);
- vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1);
- ll_ras_enter(file);
- }
-
- /* BUG: 5972 */
- file_accessed(file);
- LASSERT(vio->vui_iocb->ki_pos == pos);
- result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
-
-out:
- if (result >= 0) {
- if (result < cnt)
- io->ci_continue = 0;
- io->ci_nob += result;
- ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
- vio->vui_fd, pos, result, READ);
- result = 0;
- }
- return result;
-}
-
-static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *plist, int from, int to)
-{
- struct cl_2queue *queue = &io->ci_queue;
- struct cl_page *page;
- unsigned int bytes = 0;
- int rc = 0;
-
- if (plist->pl_nr == 0)
- return 0;
-
- if (from > 0 || to != PAGE_SIZE) {
- page = cl_page_list_first(plist);
- if (plist->pl_nr == 1) {
- cl_page_clip(env, page, from, to);
- } else {
- if (from > 0)
- cl_page_clip(env, page, from, PAGE_SIZE);
- if (to != PAGE_SIZE) {
- page = cl_page_list_last(plist);
- cl_page_clip(env, page, 0, to);
- }
- }
- }
-
- cl_2queue_init(queue);
- cl_page_list_splice(plist, &queue->c2_qin);
- rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0);
-
- /* plist is not sorted any more */
- cl_page_list_splice(&queue->c2_qin, plist);
- cl_page_list_splice(&queue->c2_qout, plist);
- cl_2queue_fini(env, queue);
-
- if (rc == 0) {
- /* calculate bytes */
- bytes = plist->pl_nr << PAGE_SHIFT;
- bytes -= from + PAGE_SIZE - to;
-
- while (plist->pl_nr > 0) {
- page = cl_page_list_first(plist);
- cl_page_list_del(env, plist, page);
-
- cl_page_clip(env, page, 0, PAGE_SIZE);
-
- SetPageUptodate(cl_page_vmpage(page));
- cl_page_disown(env, io, page);
-
- /* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
- }
- }
-
- return bytes > 0 ? bytes : rc;
-}
-
-static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
-{
- struct page *vmpage = page->cp_vmpage;
-
- SetPageUptodate(vmpage);
- set_page_dirty(vmpage);
-
- cl_page_disown(env, io, page);
-
- /* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io));
- cl_page_put(env, page);
-}
-
-/* make sure the page list is contiguous */
-static bool page_list_sanity_check(struct cl_object *obj,
- struct cl_page_list *plist)
-{
- struct cl_page *page;
- pgoff_t index = CL_PAGE_EOF;
-
- cl_page_list_for_each(page, plist) {
- struct vvp_page *vpg = cl_object_page_slice(obj, page);
-
- if (index == CL_PAGE_EOF) {
- index = vvp_index(vpg);
- continue;
- }
-
- ++index;
- if (index == vvp_index(vpg))
- continue;
-
- return false;
- }
- return true;
-}
-
-/* Return how many bytes have queued or written */
-int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io)
-{
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct vvp_io *vio = vvp_env_io(env);
- struct cl_page_list *queue = &vio->u.write.vui_queue;
- struct cl_page *page;
- int rc = 0;
- int bytes = 0;
- unsigned int npages = vio->u.write.vui_queue.pl_nr;
-
- if (npages == 0)
- return 0;
-
- CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n",
- npages, vio->u.write.vui_from, vio->u.write.vui_to);
-
- LASSERT(page_list_sanity_check(obj, queue));
-
- /* submit IO with async write */
- rc = cl_io_commit_async(env, io, queue,
- vio->u.write.vui_from, vio->u.write.vui_to,
- write_commit_callback);
- npages -= queue->pl_nr; /* already committed pages */
- if (npages > 0) {
- /* calculate how many bytes were written */
- bytes = npages << PAGE_SHIFT;
-
- /* first page */
- bytes -= vio->u.write.vui_from;
- if (queue->pl_nr == 0) /* last page */
- bytes -= PAGE_SIZE - vio->u.write.vui_to;
- LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages);
-
- vio->u.write.vui_written += bytes;
-
- CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n",
- npages, bytes, vio->u.write.vui_written);
-
- /* the first page must have been written. */
- vio->u.write.vui_from = 0;
- }
- LASSERT(page_list_sanity_check(obj, queue));
- LASSERT(ergo(rc == 0, queue->pl_nr == 0));
-
- /* out of quota, try sync write */
- if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) {
- rc = vvp_io_commit_sync(env, io, queue,
- vio->u.write.vui_from,
- vio->u.write.vui_to);
- if (rc > 0) {
- vio->u.write.vui_written += rc;
- rc = 0;
- }
- }
-
- /* update inode size */
- ll_merge_attr(env, inode);
-
- /* Now the pages in queue were failed to commit, discard them
- * unless they were dirtied before.
- */
- while (queue->pl_nr > 0) {
- page = cl_page_list_first(queue);
- cl_page_list_del(env, queue, page);
-
- if (!PageDirty(cl_page_vmpage(page)))
- cl_page_discard(env, io, page);
-
- cl_page_disown(env, io, page);
-
- /* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", io);
- cl_page_put(env, page);
- }
- cl_page_list_fini(env, queue);
-
- return rc;
-}
-
-static int vvp_io_write_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
- ssize_t result = 0;
- loff_t pos = io->u.ci_wr.wr.crw_pos;
- size_t cnt = io->u.ci_wr.wr.crw_count;
-
- down_read(&lli->lli_trunc_sem);
-
- if (!can_populate_pages(env, io, inode))
- return 0;
-
- if (cl_io_is_append(io)) {
- /*
- * PARALLEL IO This has to be changed for parallel IO doing
- * out-of-order writes.
- */
- ll_merge_attr(env, inode);
- pos = i_size_read(inode);
- io->u.ci_wr.wr.crw_pos = pos;
- vio->vui_iocb->ki_pos = pos;
- } else {
- LASSERT(vio->vui_iocb->ki_pos == pos);
- }
-
- CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
-
- /*
- * The maximum Lustre file size is variable, based on the OST maximum
- * object size and number of stripes. This needs another check in
- * addition to the VFS checks earlier.
- */
- if (pos + cnt > ll_file_maxbytes(inode)) {
- CDEBUG(D_INODE,
- "%s: file " DFID " offset %llu > maxbytes %llu\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(ll_inode2fid(inode)), pos + cnt,
- ll_file_maxbytes(inode));
- return -EFBIG;
- }
-
- if (!vio->vui_iter) {
- /* from a temp io in ll_cl_init(). */
- result = 0;
- } else {
- /*
- * When using the locked AIO function (generic_file_aio_write())
- * testing has shown the inode mutex to be a limiting factor
- * with multi-threaded single shared file performance. To get
- * around this, we now use the lockless version. To maintain
- * consistency, proper locking to protect against writes,
- * trucates, etc. is handled in the higher layers of lustre.
- */
- bool lock_node = !IS_NOSEC(inode);
-
- if (lock_node)
- inode_lock(inode);
- result = __generic_file_write_iter(vio->vui_iocb,
- vio->vui_iter);
- if (lock_node)
- inode_unlock(inode);
-
- if (result > 0 || result == -EIOCBQUEUED)
- result = generic_write_sync(vio->vui_iocb, result);
- }
-
- if (result > 0) {
- result = vvp_io_write_commit(env, io);
- if (vio->u.write.vui_written > 0) {
- result = vio->u.write.vui_written;
- io->ci_nob += result;
-
- CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n",
- io->ci_nob, result);
- }
- }
- if (result > 0) {
- set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags);
-
- if (result < cnt)
- io->ci_continue = 0;
- ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
- vio->vui_fd, pos, result, WRITE);
- result = 0;
- }
- return result;
-}
-
-static void vvp_io_rw_end(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct inode *inode = vvp_object_inode(ios->cis_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- up_read(&lli->lli_trunc_sem);
-}
-
-static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
-{
- struct vm_fault *vmf = cfio->ft_vmf;
-
- cfio->ft_flags = filemap_fault(vmf);
- cfio->ft_flags_valid = 1;
-
- if (vmf->page) {
- CDEBUG(D_PAGE,
- "page %p map %p index %lu flags %lx count %u priv %0lx: got addr %p type NOPAGE\n",
- vmf->page, vmf->page->mapping, vmf->page->index,
- (long)vmf->page->flags, page_count(vmf->page),
- page_private(vmf->page), (void *)vmf->address);
- if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
- lock_page(vmf->page);
- cfio->ft_flags |= VM_FAULT_LOCKED;
- }
-
- cfio->ft_vmpage = vmf->page;
- return 0;
- }
-
- if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
- CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", (void *)vmf->address);
- return -EFAULT;
- }
-
- if (cfio->ft_flags & VM_FAULT_OOM) {
- CDEBUG(D_PAGE, "got addr %p - OOM\n", (void *)vmf->address);
- return -ENOMEM;
- }
-
- if (cfio->ft_flags & VM_FAULT_RETRY)
- return -EAGAIN;
-
- CERROR("Unknown error in page fault %d!\n", cfio->ft_flags);
- return -EINVAL;
-}
-
-static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
-{
- set_page_dirty(page->cp_vmpage);
-}
-
-static int vvp_io_fault_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct vvp_io *vio = cl2vvp_io(env, ios);
- struct cl_io *io = ios->cis_io;
- struct cl_object *obj = io->ci_obj;
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_fault_io *fio = &io->u.ci_fault;
- struct vvp_fault_io *cfio = &vio->u.fault;
- loff_t offset;
- int result = 0;
- struct page *vmpage = NULL;
- struct cl_page *page;
- loff_t size;
- pgoff_t last_index;
-
- down_read(&lli->lli_trunc_sem);
-
- /* offset of the last byte on the page */
- offset = cl_offset(obj, fio->ft_index + 1) - 1;
- LASSERT(cl_index(obj, offset) == fio->ft_index);
- result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL);
- if (result != 0)
- return result;
-
- /* must return locked page */
- if (fio->ft_mkwrite) {
- LASSERT(cfio->ft_vmpage);
- lock_page(cfio->ft_vmpage);
- } else {
- result = vvp_io_kernel_fault(cfio);
- if (result != 0)
- return result;
- }
-
- vmpage = cfio->ft_vmpage;
- LASSERT(PageLocked(vmpage));
-
- if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE))
- ll_invalidate_page(vmpage);
-
- size = i_size_read(inode);
- /* Though we have already held a cl_lock upon this page, but
- * it still can be truncated locally.
- */
- if (unlikely((vmpage->mapping != inode->i_mapping) ||
- (page_offset(vmpage) > size))) {
- CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n");
-
- /* return +1 to stop cl_io_loop() and ll_fault() will catch
- * and retry.
- */
- result = 1;
- goto out;
- }
-
- last_index = cl_index(obj, size - 1);
-
- if (fio->ft_mkwrite) {
- /*
- * Capture the size while holding the lli_trunc_sem from above
- * we want to make sure that we complete the mkwrite action
- * while holding this lock. We need to make sure that we are
- * not past the end of the file.
- */
- if (last_index < fio->ft_index) {
- CDEBUG(D_PAGE,
- "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n",
- vmpage->mapping, fio->ft_index, last_index);
- /*
- * We need to return if we are
- * passed the end of the file. This will propagate
- * up the call stack to ll_page_mkwrite where
- * we will return VM_FAULT_NOPAGE. Any non-negative
- * value returned here will be silently
- * converted to 0. If the vmpage->mapping is null
- * the error code would be converted back to ENODATA
- * in ll_page_mkwrite0. Thus we return -ENODATA
- * to handle both cases
- */
- result = -ENODATA;
- goto out;
- }
- }
-
- page = cl_page_find(env, obj, fio->ft_index, vmpage, CPT_CACHEABLE);
- if (IS_ERR(page)) {
- result = PTR_ERR(page);
- goto out;
- }
-
- /* if page is going to be written, we should add this page into cache
- * earlier.
- */
- if (fio->ft_mkwrite) {
- wait_on_page_writeback(vmpage);
- if (!PageDirty(vmpage)) {
- struct cl_page_list *plist = &io->ci_queue.c2_qin;
- struct vvp_page *vpg = cl_object_page_slice(obj, page);
- int to = PAGE_SIZE;
-
- /* vvp_page_assume() calls wait_on_page_writeback(). */
- cl_page_assume(env, io, page);
-
- cl_page_list_init(plist);
- cl_page_list_add(plist, page);
-
- /* size fixup */
- if (last_index == vvp_index(vpg))
- to = size & ~PAGE_MASK;
-
- /* Do not set Dirty bit here so that in case IO is
- * started before the page is really made dirty, we
- * still have chance to detect it.
- */
- result = cl_io_commit_async(env, io, plist, 0, to,
- mkwrite_commit_callback);
- LASSERT(cl_page_is_owned(page, io));
- cl_page_list_fini(env, plist);
-
- vmpage = NULL;
- if (result < 0) {
- cl_page_discard(env, io, page);
- cl_page_disown(env, io, page);
-
- cl_page_put(env, page);
-
- /* we're in big trouble, what can we do now? */
- if (result == -EDQUOT)
- result = -ENOSPC;
- goto out;
- } else {
- cl_page_disown(env, io, page);
- }
- }
- }
-
- /*
- * The ft_index is only used in the case of
- * a mkwrite action. We need to check
- * our assertions are correct, since
- * we should have caught this above
- */
- LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index);
- if (fio->ft_index == last_index)
- /*
- * Last page is mapped partially.
- */
- fio->ft_nob = size - cl_offset(obj, fio->ft_index);
- else
- fio->ft_nob = cl_page_size(obj);
-
- lu_ref_add(&page->cp_reference, "fault", io);
- fio->ft_page = page;
-
-out:
- /* return unlocked vmpage to avoid deadlocking */
- if (vmpage)
- unlock_page(vmpage);
-
- cfio->ft_flags &= ~VM_FAULT_LOCKED;
-
- return result;
-}
-
-static void vvp_io_fault_end(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- struct inode *inode = vvp_object_inode(ios->cis_obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- CLOBINVRNT(env, ios->cis_io->ci_obj,
- vvp_object_invariant(ios->cis_io->ci_obj));
- up_read(&lli->lli_trunc_sem);
-}
-
-static int vvp_io_fsync_start(const struct lu_env *env,
- const struct cl_io_slice *ios)
-{
- /* we should mark TOWRITE bit to each dirty page in radix tree to
- * verify pages have been written, but this is difficult because of
- * race.
- */
- return 0;
-}
-
-static int vvp_io_read_ahead(const struct lu_env *env,
- const struct cl_io_slice *ios,
- pgoff_t start, struct cl_read_ahead *ra)
-{
- int result = 0;
-
- if (ios->cis_io->ci_type == CIT_READ ||
- ios->cis_io->ci_type == CIT_FAULT) {
- struct vvp_io *vio = cl2vvp_io(env, ios);
-
- if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
- ra->cra_end = CL_PAGE_EOF;
- result = 1; /* no need to call down */
- }
- }
-
- return result;
-}
-
-static const struct cl_io_operations vvp_io_ops = {
- .op = {
- [CIT_READ] = {
- .cio_fini = vvp_io_fini,
- .cio_lock = vvp_io_read_lock,
- .cio_start = vvp_io_read_start,
- .cio_end = vvp_io_rw_end,
- .cio_advance = vvp_io_advance,
- },
- [CIT_WRITE] = {
- .cio_fini = vvp_io_fini,
- .cio_iter_init = vvp_io_write_iter_init,
- .cio_iter_fini = vvp_io_write_iter_fini,
- .cio_lock = vvp_io_write_lock,
- .cio_start = vvp_io_write_start,
- .cio_end = vvp_io_rw_end,
- .cio_advance = vvp_io_advance,
- },
- [CIT_SETATTR] = {
- .cio_fini = vvp_io_setattr_fini,
- .cio_iter_init = vvp_io_setattr_iter_init,
- .cio_lock = vvp_io_setattr_lock,
- .cio_start = vvp_io_setattr_start,
- .cio_end = vvp_io_setattr_end
- },
- [CIT_FAULT] = {
- .cio_fini = vvp_io_fault_fini,
- .cio_iter_init = vvp_io_fault_iter_init,
- .cio_lock = vvp_io_fault_lock,
- .cio_start = vvp_io_fault_start,
- .cio_end = vvp_io_fault_end,
- },
- [CIT_FSYNC] = {
- .cio_start = vvp_io_fsync_start,
- .cio_fini = vvp_io_fini
- },
- [CIT_MISC] = {
- .cio_fini = vvp_io_fini
- }
- },
- .cio_read_ahead = vvp_io_read_ahead,
-};
-
-int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io)
-{
- struct vvp_io *vio = vvp_env_io(env);
- struct inode *inode = vvp_object_inode(obj);
- int result;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- CDEBUG(D_VFSTRACE, DFID
- " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
- PFID(lu_object_fid(&obj->co_lu)),
- io->ci_ignore_layout, io->ci_verify_layout,
- vio->vui_layout_gen, io->ci_restore_needed);
-
- CL_IO_SLICE_CLEAN(vio, vui_cl);
- cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops);
- vio->vui_ra_valid = false;
- result = 0;
- if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) {
- size_t count;
- struct ll_inode_info *lli = ll_i2info(inode);
-
- count = io->u.ci_rw.crw_count;
- /* "If nbyte is 0, read() will return 0 and have no other
- * results." -- Single Unix Spec
- */
- if (count == 0)
- result = 1;
- else
- vio->vui_tot_count = count;
-
- /* for read/write, we store the jobid in the inode, and
- * it'll be fetched by osc when building RPC.
- *
- * it's not accurate if the file is shared by different
- * jobs.
- */
- lustre_get_jobid(lli->lli_jobid);
- } else if (io->ci_type == CIT_SETATTR) {
- if (!cl_io_is_trunc(io))
- io->ci_lockreq = CILR_MANDATORY;
- }
-
- /* Enqueue layout lock and get layout version. We need to do this
- * even for operations requiring to open file, such as read and write,
- * because it might not grant layout lock in IT_OPEN.
- */
- if (result == 0 && !io->ci_ignore_layout) {
- result = ll_layout_refresh(inode, &vio->vui_layout_gen);
- if (result == -ENOENT)
- /* If the inode on MDS has been removed, but the objects
- * on OSTs haven't been destroyed (async unlink), layout
- * fetch will return -ENOENT, we'd ignore this error
- * and continue with dirty flush. LU-3230.
- */
- result = 0;
- if (result < 0)
- CERROR("%s: refresh file layout " DFID " error %d.\n",
- ll_get_fsname(inode->i_sb, NULL, 0),
- PFID(lu_object_fid(&obj->co_lu)), result);
- }
-
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
deleted file mode 100644
index 4b6c7143bd2c..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ /dev/null
@@ -1,87 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2014, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_lock for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Vvp lock functions.
- *
- */
-
-static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice)
-{
- struct vvp_lock *vlk = cl2vvp_lock(slice);
-
- kmem_cache_free(vvp_lock_kmem, vlk);
-}
-
-static int vvp_lock_enqueue(const struct lu_env *env,
- const struct cl_lock_slice *slice,
- struct cl_io *unused, struct cl_sync_io *anchor)
-{
- CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj));
-
- return 0;
-}
-
-static const struct cl_lock_operations vvp_lock_ops = {
- .clo_fini = vvp_lock_fini,
- .clo_enqueue = vvp_lock_enqueue,
-};
-
-int vvp_lock_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_lock *lock, const struct cl_io *unused)
-{
- struct vvp_lock *vlk;
- int result;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS);
- if (vlk) {
- cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops);
- result = 0;
- } else {
- result = -ENOMEM;
- }
- return result;
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
deleted file mode 100644
index 05ad3b322a29..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ /dev/null
@@ -1,305 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * cl_object implementation for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/libcfs/libcfs.h>
-
-#include <obd.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Object operations.
- *
- */
-
-int vvp_object_invariant(const struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
- struct ll_inode_info *lli = ll_i2info(inode);
-
- return (S_ISREG(inode->i_mode) || inode->i_mode == 0) &&
- lli->lli_clob == obj;
-}
-
-static int vvp_object_print(const struct lu_env *env, void *cookie,
- lu_printer_t p, const struct lu_object *o)
-{
- struct vvp_object *obj = lu2vvp(o);
- struct inode *inode = obj->vob_inode;
- struct ll_inode_info *lli;
-
- (*p)(env, cookie, "(%d %d) inode: %p ",
- atomic_read(&obj->vob_transient_pages),
- atomic_read(&obj->vob_mmap_cnt), inode);
- if (inode) {
- lli = ll_i2info(inode);
- (*p)(env, cookie, "%lu/%u %o %u %d %p " DFID,
- inode->i_ino, inode->i_generation, inode->i_mode,
- inode->i_nlink, atomic_read(&inode->i_count),
- lli->lli_clob, PFID(&lli->lli_fid));
- }
- return 0;
-}
-
-static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
- struct cl_attr *attr)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- /*
- * lov overwrites most of these fields in
- * lov_attr_get()->...lov_merge_lvb_kms(), except when inode
- * attributes are newer.
- */
-
- attr->cat_size = i_size_read(inode);
- attr->cat_mtime = inode->i_mtime.tv_sec;
- attr->cat_atime = inode->i_atime.tv_sec;
- attr->cat_ctime = inode->i_ctime.tv_sec;
- attr->cat_blocks = inode->i_blocks;
- attr->cat_uid = from_kuid(&init_user_ns, inode->i_uid);
- attr->cat_gid = from_kgid(&init_user_ns, inode->i_gid);
- /* KMS is not known by this layer */
- return 0; /* layers below have to fill in the rest */
-}
-
-static int vvp_attr_update(const struct lu_env *env, struct cl_object *obj,
- const struct cl_attr *attr, unsigned int valid)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- if (valid & CAT_UID)
- inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid);
- if (valid & CAT_GID)
- inode->i_gid = make_kgid(&init_user_ns, attr->cat_gid);
- if (valid & CAT_ATIME)
- inode->i_atime.tv_sec = attr->cat_atime;
- if (valid & CAT_MTIME)
- inode->i_mtime.tv_sec = attr->cat_mtime;
- if (valid & CAT_CTIME)
- inode->i_ctime.tv_sec = attr->cat_ctime;
- if (0 && valid & CAT_SIZE)
- i_size_write(inode, attr->cat_size);
- /* not currently necessary */
- if (0 && valid & (CAT_UID | CAT_GID | CAT_SIZE))
- mark_inode_dirty(inode);
- return 0;
-}
-
-static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
- const struct cl_object_conf *conf)
-{
- struct ll_inode_info *lli = ll_i2info(conf->coc_inode);
-
- if (conf->coc_opc == OBJECT_CONF_INVALIDATE) {
- CDEBUG(D_VFSTRACE, DFID ": losing layout lock\n",
- PFID(&lli->lli_fid));
-
- ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE);
-
- /* Clean up page mmap for this inode.
- * The reason for us to do this is that if the page has
- * already been installed into memory space, the process
- * can access it without interacting with lustre, so this
- * page may be stale due to layout change, and the process
- * will never be notified.
- * This operation is expensive but mmap processes have to pay
- * a price themselves.
- */
- unmap_mapping_range(conf->coc_inode->i_mapping,
- 0, OBD_OBJECT_EOF, 0);
- }
-
- return 0;
-}
-
-static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
-{
- struct inode *inode = vvp_object_inode(obj);
- int rc;
-
- rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
- if (rc < 0) {
- CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n",
- PFID(lu_object_fid(&obj->co_lu)), rc);
- return rc;
- }
-
- truncate_inode_pages(inode->i_mapping, 0);
- return 0;
-}
-
-static int vvp_object_glimpse(const struct lu_env *env,
- const struct cl_object *obj, struct ost_lvb *lvb)
-{
- struct inode *inode = vvp_object_inode(obj);
-
- lvb->lvb_mtime = LTIME_S(inode->i_mtime);
- lvb->lvb_atime = LTIME_S(inode->i_atime);
- lvb->lvb_ctime = LTIME_S(inode->i_ctime);
- /*
- * LU-417: Add dirty pages block count lest i_blocks reports 0, some
- * "cp" or "tar" on remote node may think it's a completely sparse file
- * and skip it.
- */
- if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0)
- lvb->lvb_blocks = dirty_cnt(inode);
- return 0;
-}
-
-static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj,
- struct cl_req_attr *attr)
-{
- u64 valid_flags = OBD_MD_FLTYPE;
- struct inode *inode;
- struct obdo *oa;
-
- oa = attr->cra_oa;
- inode = vvp_object_inode(obj);
-
- if (attr->cra_type == CRT_WRITE)
- valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
- OBD_MD_FLUID | OBD_MD_FLGID;
- obdo_from_inode(oa, inode, valid_flags & attr->cra_flags);
- obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
- if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
- oa->o_parent_oid++;
- memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, LUSTRE_JOBID_SIZE);
-}
-
-static const struct cl_object_operations vvp_ops = {
- .coo_page_init = vvp_page_init,
- .coo_lock_init = vvp_lock_init,
- .coo_io_init = vvp_io_init,
- .coo_attr_get = vvp_attr_get,
- .coo_attr_update = vvp_attr_update,
- .coo_conf_set = vvp_conf_set,
- .coo_prune = vvp_prune,
- .coo_glimpse = vvp_object_glimpse,
- .coo_req_attr_set = vvp_req_attr_set
-};
-
-static int vvp_object_init0(const struct lu_env *env,
- struct vvp_object *vob,
- const struct cl_object_conf *conf)
-{
- vob->vob_inode = conf->coc_inode;
- atomic_set(&vob->vob_transient_pages, 0);
- cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page));
- return 0;
-}
-
-static int vvp_object_init(const struct lu_env *env, struct lu_object *obj,
- const struct lu_object_conf *conf)
-{
- struct vvp_device *dev = lu2vvp_dev(obj->lo_dev);
- struct vvp_object *vob = lu2vvp(obj);
- struct lu_object *below;
- struct lu_device *under;
- int result;
-
- under = &dev->vdv_next->cd_lu_dev;
- below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under);
- if (below) {
- const struct cl_object_conf *cconf;
-
- cconf = lu2cl_conf(conf);
- lu_object_add(obj, below);
- result = vvp_object_init0(env, vob, cconf);
- } else {
- result = -ENOMEM;
- }
-
- return result;
-}
-
-static void vvp_object_free(const struct lu_env *env, struct lu_object *obj)
-{
- struct vvp_object *vob = lu2vvp(obj);
-
- lu_object_fini(obj);
- lu_object_header_fini(obj->lo_header);
- kmem_cache_free(vvp_object_kmem, vob);
-}
-
-static const struct lu_object_operations vvp_lu_obj_ops = {
- .loo_object_init = vvp_object_init,
- .loo_object_free = vvp_object_free,
- .loo_object_print = vvp_object_print,
-};
-
-struct vvp_object *cl_inode2vvp(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct cl_object *obj = lli->lli_clob;
- struct lu_object *lu;
-
- lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type);
- LASSERT(lu);
- return lu2vvp(lu);
-}
-
-struct lu_object *vvp_object_alloc(const struct lu_env *env,
- const struct lu_object_header *unused,
- struct lu_device *dev)
-{
- struct vvp_object *vob;
- struct lu_object *obj;
-
- vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS);
- if (vob) {
- struct cl_object_header *hdr;
-
- obj = &vob->vob_cl.co_lu;
- hdr = &vob->vob_header;
- cl_object_header_init(hdr);
- hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
-
- lu_object_init(obj, &hdr->coh_lu, dev);
- lu_object_add_top(&hdr->coh_lu, obj);
-
- vob->vob_cl.co_ops = &vvp_ops;
- obj->lo_ops = &vvp_lu_obj_ops;
- } else {
- obj = NULL;
- }
- return obj;
-}
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
deleted file mode 100644
index 6eb0565ddc22..000000000000
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ /dev/null
@@ -1,523 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * Implementation of cl_page for VVP layer.
- *
- * Author: Nikita Danilov <nikita.danilov@sun.com>
- * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/page-flags.h>
-#include <linux/pagemap.h>
-
-#include "llite_internal.h"
-#include "vvp_internal.h"
-
-/*****************************************************************************
- *
- * Page operations.
- *
- */
-
-static void vvp_page_fini_common(struct vvp_page *vpg)
-{
- struct page *vmpage = vpg->vpg_page;
-
- LASSERT(vmpage);
- put_page(vmpage);
-}
-
-static void vvp_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
-
- /*
- * vmpage->private was already cleared when page was moved into
- * VPG_FREEING state.
- */
- LASSERT((struct cl_page *)vmpage->private != slice->cpl_page);
- vvp_page_fini_common(vpg);
-}
-
-static int vvp_page_own(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io,
- int nonblock)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
-
- LASSERT(vmpage);
- if (nonblock) {
- if (!trylock_page(vmpage))
- return -EAGAIN;
-
- if (unlikely(PageWriteback(vmpage))) {
- unlock_page(vmpage);
- return -EAGAIN;
- }
-
- return 0;
- }
-
- lock_page(vmpage);
- wait_on_page_writeback(vmpage);
-
- return 0;
-}
-
-static void vvp_page_assume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
- wait_on_page_writeback(vmpage);
-}
-
-static void vvp_page_unassume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-}
-
-static void vvp_page_disown(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-
- unlock_page(cl2vm_page(slice));
-}
-
-static void vvp_page_discard(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct vvp_page *vpg = cl2vvp_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
-
- if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used)
- ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED);
-
- ll_invalidate_page(vmpage);
-}
-
-static void vvp_page_delete(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct inode *inode = vmpage->mapping->host;
- struct cl_object *obj = slice->cpl_obj;
- struct cl_page *page = slice->cpl_page;
- int refc;
-
- LASSERT(PageLocked(vmpage));
- LASSERT((struct cl_page *)vmpage->private == page);
- LASSERT(inode == vvp_object_inode(obj));
-
- /* Drop the reference count held in vvp_page_init */
- refc = atomic_dec_return(&page->cp_ref);
- LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
-
- ClearPagePrivate(vmpage);
- vmpage->private = 0;
- /*
- * Reference from vmpage to cl_page is removed, but the reference back
- * is still here. It is removed later in vvp_page_fini().
- */
-}
-
-static void vvp_page_export(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int uptodate)
-{
- struct page *vmpage = cl2vm_page(slice);
-
- LASSERT(vmpage);
- LASSERT(PageLocked(vmpage));
- if (uptodate)
- SetPageUptodate(vmpage);
- else
- ClearPageUptodate(vmpage);
-}
-
-static int vvp_page_is_vmlocked(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- return PageLocked(cl2vm_page(slice)) ? -EBUSY : -ENODATA;
-}
-
-static int vvp_page_prep_read(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- /* Skip the page already marked as PG_uptodate. */
- return PageUptodate(cl2vm_page(slice)) ? -EALREADY : 0;
-}
-
-static int vvp_page_prep_write(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct cl_page *pg = slice->cpl_page;
-
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageDirty(vmpage));
-
- /* ll_writepage path is not a sync write, so need to set page writeback
- * flag
- */
- if (!pg->cp_sync_io)
- set_page_writeback(vmpage);
-
- return 0;
-}
-
-/**
- * Handles page transfer errors at VM level.
- *
- * This takes inode as a separate argument, because inode on which error is to
- * be set can be different from \a vmpage inode in case of direct-io.
- */
-static void vvp_vmpage_error(struct inode *inode, struct page *vmpage,
- int ioret)
-{
- struct vvp_object *obj = cl_inode2vvp(inode);
-
- if (ioret == 0) {
- ClearPageError(vmpage);
- obj->vob_discard_page_warned = 0;
- } else {
- SetPageError(vmpage);
- mapping_set_error(inode->i_mapping, ioret);
-
- if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
- obj->vob_discard_page_warned == 0) {
- obj->vob_discard_page_warned = 1;
- ll_dirty_page_discard_warn(vmpage, ioret);
- }
- }
-}
-
-static void vvp_page_completion_read(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
- struct cl_page *page = slice->cpl_page;
- struct inode *inode = vvp_object_inode(page->cp_obj);
-
- LASSERT(PageLocked(vmpage));
- CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret);
-
- if (vpg->vpg_defer_uptodate)
- ll_ra_count_put(ll_i2sbi(inode), 1);
-
- if (ioret == 0) {
- if (!vpg->vpg_defer_uptodate)
- cl_page_export(env, page, 1);
- } else {
- vpg->vpg_defer_uptodate = 0;
- }
-
- if (!page->cp_sync_io)
- unlock_page(vmpage);
-}
-
-static void vvp_page_completion_write(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct cl_page *pg = slice->cpl_page;
- struct page *vmpage = vpg->vpg_page;
-
- CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
-
- if (pg->cp_sync_io) {
- LASSERT(PageLocked(vmpage));
- LASSERT(!PageWriteback(vmpage));
- } else {
- LASSERT(PageWriteback(vmpage));
- /*
- * Only mark the page error only when it's an async write
- * because applications won't wait for IO to finish.
- */
- vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret);
-
- end_page_writeback(vmpage);
- }
-}
-
-/**
- * Implements cl_page_operations::cpo_make_ready() method.
- *
- * This is called to yank a page from the transfer cache and to send it out as
- * a part of transfer. This function try-locks the page. If try-lock failed,
- * page is owned by some concurrent IO, and should be skipped (this is bad,
- * but hopefully rare situation, as it usually results in transfer being
- * shorter than possible).
- *
- * \retval 0 success, page can be placed into transfer
- *
- * \retval -EAGAIN page is either used by concurrent IO has been
- * truncated. Skip it.
- */
-static int vvp_page_make_ready(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct cl_page *pg = slice->cpl_page;
- int result = 0;
-
- lock_page(vmpage);
- if (clear_page_dirty_for_io(vmpage)) {
- LASSERT(pg->cp_state == CPS_CACHED);
- /* This actually clears the dirty bit in the radix tree. */
- set_page_writeback(vmpage);
- CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
- } else if (pg->cp_state == CPS_PAGEOUT) {
- /* is it possible for osc_flush_async_page() to already
- * make it ready?
- */
- result = -EALREADY;
- } else {
- CL_PAGE_DEBUG(D_ERROR, env, pg, "Unexpecting page state %d.\n",
- pg->cp_state);
- LBUG();
- }
- unlock_page(vmpage);
- return result;
-}
-
-static int vvp_page_print(const struct lu_env *env,
- const struct cl_page_slice *slice,
- void *cookie, lu_printer_t printer)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct page *vmpage = vpg->vpg_page;
-
- (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d) vm@%p ",
- vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used, vmpage);
- if (vmpage) {
- (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
- (long)vmpage->flags, page_count(vmpage),
- page_mapcount(vmpage), vmpage->private,
- vmpage->index,
- list_empty(&vmpage->lru) ? "not-" : "");
- }
-
- (*printer)(env, cookie, "\n");
-
- return 0;
-}
-
-static int vvp_page_fail(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- /*
- * Cached read?
- */
- LBUG();
-
- return 0;
-}
-
-static const struct cl_page_operations vvp_page_ops = {
- .cpo_own = vvp_page_own,
- .cpo_assume = vvp_page_assume,
- .cpo_unassume = vvp_page_unassume,
- .cpo_disown = vvp_page_disown,
- .cpo_discard = vvp_page_discard,
- .cpo_delete = vvp_page_delete,
- .cpo_export = vvp_page_export,
- .cpo_is_vmlocked = vvp_page_is_vmlocked,
- .cpo_fini = vvp_page_fini,
- .cpo_print = vvp_page_print,
- .io = {
- [CRT_READ] = {
- .cpo_prep = vvp_page_prep_read,
- .cpo_completion = vvp_page_completion_read,
- .cpo_make_ready = vvp_page_fail,
- },
- [CRT_WRITE] = {
- .cpo_prep = vvp_page_prep_write,
- .cpo_completion = vvp_page_completion_write,
- .cpo_make_ready = vvp_page_make_ready,
- },
- },
-};
-
-static int vvp_transient_page_prep(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- /* transient page should always be sent. */
- return 0;
-}
-
-static int vvp_transient_page_own(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused, int nonblock)
-{
- return 0;
-}
-
-static void vvp_transient_page_assume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
-}
-
-static void vvp_transient_page_unassume(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
-}
-
-static void vvp_transient_page_disown(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
-}
-
-static void vvp_transient_page_discard(const struct lu_env *env,
- const struct cl_page_slice *slice,
- struct cl_io *unused)
-{
- struct cl_page *page = slice->cpl_page;
-
- /*
- * For transient pages, remove it from the radix tree.
- */
- cl_page_delete(env, page);
-}
-
-static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct inode *inode = vvp_object_inode(slice->cpl_obj);
- int locked;
-
- locked = !inode_trylock(inode);
- if (!locked)
- inode_unlock(inode);
- return locked ? -EBUSY : -ENODATA;
-}
-
-static void
-vvp_transient_page_completion(const struct lu_env *env,
- const struct cl_page_slice *slice,
- int ioret)
-{
-}
-
-static void vvp_transient_page_fini(const struct lu_env *env,
- struct cl_page_slice *slice)
-{
- struct vvp_page *vpg = cl2vvp_page(slice);
- struct cl_page *clp = slice->cpl_page;
- struct vvp_object *clobj = cl2vvp(clp->cp_obj);
-
- vvp_page_fini_common(vpg);
- atomic_dec(&clobj->vob_transient_pages);
-}
-
-static const struct cl_page_operations vvp_transient_page_ops = {
- .cpo_own = vvp_transient_page_own,
- .cpo_assume = vvp_transient_page_assume,
- .cpo_unassume = vvp_transient_page_unassume,
- .cpo_disown = vvp_transient_page_disown,
- .cpo_discard = vvp_transient_page_discard,
- .cpo_fini = vvp_transient_page_fini,
- .cpo_is_vmlocked = vvp_transient_page_is_vmlocked,
- .cpo_print = vvp_page_print,
- .io = {
- [CRT_READ] = {
- .cpo_prep = vvp_transient_page_prep,
- .cpo_completion = vvp_transient_page_completion,
- },
- [CRT_WRITE] = {
- .cpo_prep = vvp_transient_page_prep,
- .cpo_completion = vvp_transient_page_completion,
- }
- }
-};
-
-int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
- struct cl_page *page, pgoff_t index)
-{
- struct vvp_page *vpg = cl_object_page_slice(obj, page);
- struct page *vmpage = page->cp_vmpage;
-
- CLOBINVRNT(env, obj, vvp_object_invariant(obj));
-
- vpg->vpg_page = vmpage;
- get_page(vmpage);
-
- if (page->cp_type == CPT_CACHEABLE) {
- /* in cache, decref in vvp_page_delete */
- atomic_inc(&page->cp_ref);
- SetPagePrivate(vmpage);
- vmpage->private = (unsigned long)page;
- cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
- &vvp_page_ops);
- } else {
- struct vvp_object *clobj = cl2vvp(obj);
-
- cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
- &vvp_transient_page_ops);
- atomic_inc(&clobj->vob_transient_pages);
- }
- return 0;
-}
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
deleted file mode 100644
index 2d78432963dc..000000000000
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ /dev/null
@@ -1,638 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2015, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/xattr.h>
-#include <linux/selinux.h>
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <obd_support.h>
-#include <lustre_dlm.h>
-
-#include "llite_internal.h"
-
-const struct xattr_handler *get_xattr_type(const char *name)
-{
- int i = 0;
-
- while (ll_xattr_handlers[i]) {
- size_t len = strlen(ll_xattr_handlers[i]->prefix);
-
- if (!strncmp(ll_xattr_handlers[i]->prefix, name, len))
- return ll_xattr_handlers[i];
- i++;
- }
- return NULL;
-}
-
-static int xattr_type_filter(struct ll_sb_info *sbi,
- const struct xattr_handler *handler)
-{
- /* No handler means XATTR_OTHER_T */
- if (!handler)
- return -EOPNOTSUPP;
-
- if ((handler->flags == XATTR_ACL_ACCESS_T ||
- handler->flags == XATTR_ACL_DEFAULT_T) &&
- !(sbi->ll_flags & LL_SBI_ACL))
- return -EOPNOTSUPP;
-
- if (handler->flags == XATTR_USER_T &&
- !(sbi->ll_flags & LL_SBI_USER_XATTR))
- return -EOPNOTSUPP;
-
- if (handler->flags == XATTR_TRUSTED_T &&
- !capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- return 0;
-}
-
-static int
-ll_xattr_set_common(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value, size_t size,
- int flags)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- const char *pv = value;
- char *fullname;
- __u64 valid;
- int rc;
-
- if (flags == XATTR_REPLACE) {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
- valid = OBD_MD_FLXATTRRM;
- } else {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
- valid = OBD_MD_FLXATTR;
- }
-
- rc = xattr_type_filter(sbi, handler);
- if (rc)
- return rc;
-
- if ((handler->flags == XATTR_ACL_ACCESS_T ||
- handler->flags == XATTR_ACL_DEFAULT_T) &&
- !inode_owner_or_capable(inode))
- return -EPERM;
-
- /* b10667: ignore lustre special xattr for now */
- if (!strcmp(name, "hsm") ||
- ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) ||
- (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov"))))
- return 0;
-
- /* b15587: ignore security.capability xattr for now */
- if ((handler->flags == XATTR_SECURITY_T &&
- !strcmp(name, "capability")))
- return 0;
-
- /* LU-549: Disable security.selinux when selinux is disabled */
- if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
- strcmp(name, "selinux") == 0)
- return -EOPNOTSUPP;
-
- /*FIXME: enable IMA when the conditions are ready */
- if (handler->flags == XATTR_SECURITY_T &&
- (!strcmp(name, "ima") || !strcmp(name, "evm")))
- return -EOPNOTSUPP;
-
- /*
- * In user.* namespace, only regular files and directories can have
- * extended attributes.
- */
- if (handler->flags == XATTR_USER_T) {
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
- return -EPERM;
- }
-
- fullname = kasprintf(GFP_KERNEL, "%s%s\n", handler->prefix, name);
- if (!fullname)
- return -ENOMEM;
- rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
- valid, fullname, pv, size, 0, flags,
- ll_i2suppgid(inode), &req);
- kfree(fullname);
- if (rc) {
- if (rc == -EOPNOTSUPP && handler->flags == XATTR_USER_T) {
- LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
- }
- return rc;
- }
-
- ptlrpc_req_finished(req);
- return 0;
-}
-
-static int get_hsm_state(struct inode *inode, u32 *hus_states)
-{
- struct md_op_data *op_data;
- struct hsm_user_state *hus;
- int rc;
-
- hus = kzalloc(sizeof(*hus), GFP_NOFS);
- if (!hus)
- return -ENOMEM;
-
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, hus);
- if (!IS_ERR(op_data)) {
- rc = obd_iocontrol(LL_IOC_HSM_STATE_GET, ll_i2mdexp(inode),
- sizeof(*op_data), op_data, NULL);
- if (!rc)
- *hus_states = hus->hus_states;
- else
- CDEBUG(D_VFSTRACE, "obd_iocontrol failed. rc = %d\n",
- rc);
-
- ll_finish_md_op_data(op_data);
- } else {
- rc = PTR_ERR(op_data);
- CDEBUG(D_VFSTRACE, "Could not prepare the opdata. rc = %d\n",
- rc);
- }
- kfree(hus);
- return rc;
-}
-
-static int ll_xattr_set(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, const void *value, size_t size,
- int flags)
-{
- LASSERT(inode);
- LASSERT(name);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n",
- PFID(ll_inode2fid(inode)), inode, name);
-
- if (!strcmp(name, "lov")) {
- struct lov_user_md *lump = (struct lov_user_md *)value;
- int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
- LPROC_LL_SETXATTR;
- int rc = 0;
-
- ll_stats_ops_tally(ll_i2sbi(inode), op_type, 1);
-
- if (size != 0 && size < sizeof(struct lov_user_md))
- return -EINVAL;
-
- /*
- * It is possible to set an xattr to a "" value of zero size.
- * For this case we are going to treat it as a removal.
- */
- if (!size && lump)
- lump = NULL;
-
- /* Attributes that are saved via getxattr will always have
- * the stripe_offset as 0. Instead, the MDS should be
- * allowed to pick the starting OST index. b=17846
- */
- if (lump && lump->lmm_stripe_offset == 0)
- lump->lmm_stripe_offset = -1;
-
- /* Avoid anyone directly setting the RELEASED flag. */
- if (lump && (lump->lmm_pattern & LOV_PATTERN_F_RELEASED)) {
- /* Only if we have a released flag check if the file
- * was indeed archived.
- */
- u32 state = HS_NONE;
-
- rc = get_hsm_state(inode, &state);
- if (rc)
- return rc;
-
- if (!(state & HS_ARCHIVED)) {
- CDEBUG(D_VFSTRACE,
- "hus_states state = %x, pattern = %x\n",
- state, lump->lmm_pattern);
- /*
- * Here the state is: real file is not
- * archived but user is requesting to set
- * the RELEASED flag so we mask off the
- * released flag from the request
- */
- lump->lmm_pattern ^= LOV_PATTERN_F_RELEASED;
- }
- }
-
- if (lump && S_ISREG(inode->i_mode)) {
- __u64 it_flags = FMODE_WRITE;
- int lum_size;
-
- lum_size = ll_lov_user_md_size(lump);
- if (lum_size < 0 || size < lum_size)
- return 0; /* b=10667: ignore error */
-
- rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags,
- lump, lum_size);
- /* b=10667: rc always be 0 here for now */
- rc = 0;
- } else if (S_ISDIR(inode->i_mode)) {
- rc = ll_dir_setstripe(inode, lump, 0);
- }
-
- return rc;
-
- } else if (!strcmp(name, "lma") || !strcmp(name, "link")) {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
- return 0;
- }
-
- return ll_xattr_set_common(handler, dentry, inode, name, value, size,
- flags);
-}
-
-int
-ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer,
- size_t size, __u64 valid)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- struct mdt_body *body;
- void *xdata;
- int rc;
-
- if (sbi->ll_xattr_cache_enabled && type != XATTR_ACL_ACCESS_T &&
- (type != XATTR_SECURITY_T || strcmp(name, "security.selinux"))) {
- rc = ll_xattr_cache_get(inode, name, buffer, size, valid);
- if (rc == -EAGAIN)
- goto getxattr_nocache;
- if (rc < 0)
- goto out_xattr;
-
- /* Add "system.posix_acl_access" to the list */
- if (lli->lli_posix_acl && valid & OBD_MD_FLXATTRLS) {
- if (size == 0) {
- rc += sizeof(XATTR_NAME_ACL_ACCESS);
- } else if (size - rc >= sizeof(XATTR_NAME_ACL_ACCESS)) {
- memcpy(buffer + rc, XATTR_NAME_ACL_ACCESS,
- sizeof(XATTR_NAME_ACL_ACCESS));
- rc += sizeof(XATTR_NAME_ACL_ACCESS);
- } else {
- rc = -ERANGE;
- goto out_xattr;
- }
- }
- } else {
-getxattr_nocache:
- rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
- valid, name, NULL, 0, size, 0, &req);
- if (rc < 0)
- goto out_xattr;
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- LASSERT(body);
-
- /* only detect the xattr size */
- if (size == 0) {
- rc = body->mbo_eadatasize;
- goto out;
- }
-
- if (size < body->mbo_eadatasize) {
- CERROR("server bug: replied size %u > %u\n",
- body->mbo_eadatasize, (int)size);
- rc = -ERANGE;
- goto out;
- }
-
- if (body->mbo_eadatasize == 0) {
- rc = -ENODATA;
- goto out;
- }
-
- /* do not need swab xattr data */
- xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
- body->mbo_eadatasize);
- if (!xdata) {
- rc = -EFAULT;
- goto out;
- }
-
- memcpy(buffer, xdata, body->mbo_eadatasize);
- rc = body->mbo_eadatasize;
- }
-
-out_xattr:
- if (rc == -EOPNOTSUPP && type == XATTR_USER_T) {
- LCONSOLE_INFO(
- "%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n",
- ll_get_fsname(inode->i_sb, NULL, 0), rc);
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
- }
-out:
- ptlrpc_req_finished(req);
- return rc;
-}
-
-static int ll_xattr_get_common(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
-#ifdef CONFIG_FS_POSIX_ACL
- struct ll_inode_info *lli = ll_i2info(inode);
-#endif
- char *fullname;
- int rc;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
-
- rc = xattr_type_filter(sbi, handler);
- if (rc)
- return rc;
-
- /* b15587: ignore security.capability xattr for now */
- if ((handler->flags == XATTR_SECURITY_T && !strcmp(name, "capability")))
- return -ENODATA;
-
- /* LU-549: Disable security.selinux when selinux is disabled */
- if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
- !strcmp(name, "selinux"))
- return -EOPNOTSUPP;
-
-#ifdef CONFIG_FS_POSIX_ACL
- /* posix acl is under protection of LOOKUP lock. when calling to this,
- * we just have path resolution to the target inode, so we have great
- * chance that cached ACL is uptodate.
- */
- if (handler->flags == XATTR_ACL_ACCESS_T) {
- struct posix_acl *acl;
-
- spin_lock(&lli->lli_lock);
- acl = posix_acl_dup(lli->lli_posix_acl);
- spin_unlock(&lli->lli_lock);
-
- if (!acl)
- return -ENODATA;
-
- rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
- return rc;
- }
- if (handler->flags == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
- return -ENODATA;
-#endif
- fullname = kasprintf(GFP_KERNEL, "%s%s\n", handler->prefix, name);
- if (!fullname)
- return -ENOMEM;
- rc = ll_xattr_list(inode, fullname, handler->flags, buffer, size,
- OBD_MD_FLXATTR);
- kfree(fullname);
- return rc;
-}
-
-static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size)
-{
- ssize_t rc;
-
- if (S_ISREG(inode->i_mode)) {
- struct cl_object *obj = ll_i2info(inode)->lli_clob;
- struct cl_layout cl = {
- .cl_buf.lb_buf = buf,
- .cl_buf.lb_len = buf_size,
- };
- struct lu_env *env;
- u16 refcheck;
-
- if (!obj)
- return -ENODATA;
-
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- return PTR_ERR(env);
-
- rc = cl_object_layout_get(env, obj, &cl);
- if (rc < 0)
- goto out_env;
-
- if (!cl.cl_size) {
- rc = -ENODATA;
- goto out_env;
- }
-
- rc = cl.cl_size;
-
- if (!buf_size)
- goto out_env;
-
- LASSERT(buf && rc <= buf_size);
-
- /*
- * Do not return layout gen for getxattr() since
- * otherwise it would confuse tar --xattr by
- * recognizing layout gen as stripe offset when the
- * file is restored. See LU-2809.
- */
- ((struct lov_mds_md *)buf)->lmm_layout_gen = 0;
-out_env:
- cl_env_put(env, &refcheck);
-
- return rc;
- } else if (S_ISDIR(inode->i_mode)) {
- struct ptlrpc_request *req = NULL;
- struct lov_mds_md *lmm = NULL;
- int lmm_size = 0;
-
- rc = ll_dir_getstripe(inode, (void **)&lmm, &lmm_size,
- &req, 0);
- if (rc < 0)
- goto out_req;
-
- if (!buf_size) {
- rc = lmm_size;
- goto out_req;
- }
-
- if (buf_size < lmm_size) {
- rc = -ERANGE;
- goto out_req;
- }
-
- memcpy(buf, lmm, lmm_size);
- rc = lmm_size;
-out_req:
- if (req)
- ptlrpc_req_finished(req);
-
- return rc;
- } else {
- return -ENODATA;
- }
-}
-
-static int ll_xattr_get(const struct xattr_handler *handler,
- struct dentry *dentry, struct inode *inode,
- const char *name, void *buffer, size_t size)
-{
- LASSERT(inode);
- LASSERT(name);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n",
- PFID(ll_inode2fid(inode)), inode, name);
-
- if (!strcmp(name, "lov")) {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
-
- return ll_getxattr_lov(inode, buffer, size);
- }
-
- return ll_xattr_get_common(handler, dentry, inode, name, buffer, size);
-}
-
-ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
- struct inode *inode = d_inode(dentry);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- char *xattr_name;
- ssize_t rc, rc2;
- size_t len, rem;
-
- LASSERT(inode);
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n",
- PFID(ll_inode2fid(inode)), inode);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);
-
- rc = ll_xattr_list(inode, NULL, XATTR_OTHER_T, buffer, size,
- OBD_MD_FLXATTRLS);
- if (rc < 0)
- return rc;
- /*
- * If we're being called to get the size of the xattr list
- * (buf_size == 0) then just assume that a lustre.lov xattr
- * exists.
- */
- if (!size)
- return rc + sizeof(XATTR_LUSTRE_LOV);
-
- xattr_name = buffer;
- rem = rc;
-
- while (rem > 0) {
- len = strnlen(xattr_name, rem - 1) + 1;
- rem -= len;
- if (!xattr_type_filter(sbi, get_xattr_type(xattr_name))) {
- /* Skip OK xattr type leave it in buffer */
- xattr_name += len;
- continue;
- }
-
- /*
- * Move up remaining xattrs in buffer
- * removing the xattr that is not OK
- */
- memmove(xattr_name, xattr_name + len, rem);
- rc -= len;
- }
-
- rc2 = ll_getxattr_lov(inode, NULL, 0);
- if (rc2 == -ENODATA)
- return rc;
-
- if (rc2 < 0)
- return rc2;
-
- if (size < rc + sizeof(XATTR_LUSTRE_LOV))
- return -ERANGE;
-
- memcpy(buffer + rc, XATTR_LUSTRE_LOV, sizeof(XATTR_LUSTRE_LOV));
-
- return rc + sizeof(XATTR_LUSTRE_LOV);
-}
-
-static const struct xattr_handler ll_user_xattr_handler = {
- .prefix = XATTR_USER_PREFIX,
- .flags = XATTR_USER_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_trusted_xattr_handler = {
- .prefix = XATTR_TRUSTED_PREFIX,
- .flags = XATTR_TRUSTED_T,
- .get = ll_xattr_get,
- .set = ll_xattr_set,
-};
-
-static const struct xattr_handler ll_security_xattr_handler = {
- .prefix = XATTR_SECURITY_PREFIX,
- .flags = XATTR_SECURITY_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_acl_access_xattr_handler = {
- .prefix = XATTR_NAME_POSIX_ACL_ACCESS,
- .flags = XATTR_ACL_ACCESS_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_acl_default_xattr_handler = {
- .prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
- .flags = XATTR_ACL_DEFAULT_T,
- .get = ll_xattr_get_common,
- .set = ll_xattr_set_common,
-};
-
-static const struct xattr_handler ll_lustre_xattr_handler = {
- .prefix = XATTR_LUSTRE_PREFIX,
- .flags = XATTR_LUSTRE_T,
- .get = ll_xattr_get,
- .set = ll_xattr_set,
-};
-
-const struct xattr_handler *ll_xattr_handlers[] = {
- &ll_user_xattr_handler,
- &ll_trusted_xattr_handler,
- &ll_security_xattr_handler,
-#ifdef CONFIG_FS_POSIX_ACL
- &ll_acl_access_xattr_handler,
- &ll_acl_default_xattr_handler,
-#endif
- &ll_lustre_xattr_handler,
- NULL,
-};
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
deleted file mode 100644
index 4dc799d60a9f..000000000000
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ /dev/null
@@ -1,523 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Copyright (c) 2013, 2015, Intel Corporation.
- *
- * Author: Andrew Perepechko <Andrew_Perepechko@xyratex.com>
- *
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <obd_support.h>
-#include <lustre_dlm.h>
-#include "llite_internal.h"
-
-/* If we ever have hundreds of extended attributes, we might want to consider
- * using a hash or a tree structure instead of list for faster lookups.
- */
-struct ll_xattr_entry {
- struct list_head xe_list; /* protected with
- * lli_xattrs_list_rwsem
- */
- char *xe_name; /* xattr name, \0-terminated */
- char *xe_value; /* xattr value */
- unsigned int xe_namelen; /* strlen(xe_name) + 1 */
- unsigned int xe_vallen; /* xattr value length */
-};
-
-static struct kmem_cache *xattr_kmem;
-static struct lu_kmem_descr xattr_caches[] = {
- {
- .ckd_cache = &xattr_kmem,
- .ckd_name = "xattr_kmem",
- .ckd_size = sizeof(struct ll_xattr_entry)
- },
- {
- .ckd_cache = NULL
- }
-};
-
-int ll_xattr_init(void)
-{
- return lu_kmem_init(xattr_caches);
-}
-
-void ll_xattr_fini(void)
-{
- lu_kmem_fini(xattr_caches);
-}
-
-/**
- * Initializes xattr cache for an inode.
- *
- * This initializes the xattr list and marks cache presence.
- */
-static void ll_xattr_cache_init(struct ll_inode_info *lli)
-{
- INIT_LIST_HEAD(&lli->lli_xattrs);
- set_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
-}
-
-/**
- * This looks for a specific extended attribute.
- *
- * Find in @cache and return @xattr_name attribute in @xattr,
- * for the NULL @xattr_name return the first cached @xattr.
- *
- * \retval 0 success
- * \retval -ENODATA if not found
- */
-static int ll_xattr_cache_find(struct list_head *cache,
- const char *xattr_name,
- struct ll_xattr_entry **xattr)
-{
- struct ll_xattr_entry *entry;
-
- list_for_each_entry(entry, cache, xe_list) {
- /* xattr_name == NULL means look for any entry */
- if (!xattr_name || strcmp(xattr_name, entry->xe_name) == 0) {
- *xattr = entry;
- CDEBUG(D_CACHE, "find: [%s]=%.*s\n",
- entry->xe_name, entry->xe_vallen,
- entry->xe_value);
- return 0;
- }
- }
-
- return -ENODATA;
-}
-
-/**
- * This adds an xattr.
- *
- * Add @xattr_name attr with @xattr_val value and @xattr_val_len length,
- *
- * \retval 0 success
- * \retval -ENOMEM if no memory could be allocated for the cached attr
- * \retval -EPROTO if duplicate xattr is being added
- */
-static int ll_xattr_cache_add(struct list_head *cache,
- const char *xattr_name,
- const char *xattr_val,
- unsigned int xattr_val_len)
-{
- struct ll_xattr_entry *xattr;
-
- if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) {
- CDEBUG(D_CACHE, "duplicate xattr: [%s]\n", xattr_name);
- return -EPROTO;
- }
-
- xattr = kmem_cache_zalloc(xattr_kmem, GFP_NOFS);
- if (!xattr) {
- CDEBUG(D_CACHE, "failed to allocate xattr\n");
- return -ENOMEM;
- }
-
- xattr->xe_name = kstrdup(xattr_name, GFP_NOFS);
- if (!xattr->xe_name) {
- CDEBUG(D_CACHE, "failed to alloc xattr name %u\n",
- xattr->xe_namelen);
- goto err_name;
- }
- xattr->xe_value = kmemdup(xattr_val, xattr_val_len, GFP_NOFS);
- if (!xattr->xe_value)
- goto err_value;
-
- xattr->xe_vallen = xattr_val_len;
- list_add(&xattr->xe_list, cache);
-
- CDEBUG(D_CACHE, "set: [%s]=%.*s\n", xattr_name, xattr_val_len,
- xattr_val);
-
- return 0;
-err_value:
- kfree(xattr->xe_name);
-err_name:
- kmem_cache_free(xattr_kmem, xattr);
-
- return -ENOMEM;
-}
-
-/**
- * This removes an extended attribute from cache.
- *
- * Remove @xattr_name attribute from @cache.
- *
- * \retval 0 success
- * \retval -ENODATA if @xattr_name is not cached
- */
-static int ll_xattr_cache_del(struct list_head *cache,
- const char *xattr_name)
-{
- struct ll_xattr_entry *xattr;
-
- CDEBUG(D_CACHE, "del xattr: %s\n", xattr_name);
-
- if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) {
- list_del(&xattr->xe_list);
- kfree(xattr->xe_name);
- kfree(xattr->xe_value);
- kmem_cache_free(xattr_kmem, xattr);
-
- return 0;
- }
-
- return -ENODATA;
-}
-
-/**
- * This iterates cached extended attributes.
- *
- * Walk over cached attributes in @cache and
- * fill in @xld_buffer or only calculate buffer
- * size if @xld_buffer is NULL.
- *
- * \retval >= 0 buffer list size
- * \retval -ENODATA if the list cannot fit @xld_size buffer
- */
-static int ll_xattr_cache_list(struct list_head *cache,
- char *xld_buffer,
- int xld_size)
-{
- struct ll_xattr_entry *xattr, *tmp;
- int xld_tail = 0;
-
- list_for_each_entry_safe(xattr, tmp, cache, xe_list) {
- CDEBUG(D_CACHE, "list: buffer=%p[%d] name=%s\n",
- xld_buffer, xld_tail, xattr->xe_name);
-
- if (xld_buffer) {
- xld_size -= xattr->xe_namelen;
- if (xld_size < 0)
- break;
- memcpy(&xld_buffer[xld_tail],
- xattr->xe_name, xattr->xe_namelen);
- }
- xld_tail += xattr->xe_namelen;
- }
-
- if (xld_size < 0)
- return -ERANGE;
-
- return xld_tail;
-}
-
-/**
- * Check if the xattr cache is initialized (filled).
- *
- * \retval 0 @cache is not initialized
- * \retval 1 @cache is initialized
- */
-static int ll_xattr_cache_valid(struct ll_inode_info *lli)
-{
- return test_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
-}
-
-/**
- * This finalizes the xattr cache.
- *
- * Free all xattr memory. @lli is the inode info pointer.
- *
- * \retval 0 no error occurred
- */
-static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli)
-{
- if (!ll_xattr_cache_valid(lli))
- return 0;
-
- while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0)
- ; /* empty loop */
-
- clear_bit(LLIF_XATTR_CACHE, &lli->lli_flags);
-
- return 0;
-}
-
-int ll_xattr_cache_destroy(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
-
- down_write(&lli->lli_xattrs_list_rwsem);
- rc = ll_xattr_cache_destroy_locked(lli);
- up_write(&lli->lli_xattrs_list_rwsem);
-
- return rc;
-}
-
-/**
- * Match or enqueue a PR lock.
- *
- * Find or request an LDLM lock with xattr data.
- * Since LDLM does not provide API for atomic match_or_enqueue,
- * the function handles it with a separate enq lock.
- * If successful, the function exits with the list lock held.
- *
- * \retval 0 no error occurred
- * \retval -ENOMEM not enough memory
- */
-static int ll_xattr_find_get_lock(struct inode *inode,
- struct lookup_intent *oit,
- struct ptlrpc_request **req)
-{
- enum ldlm_mode mode;
- struct lustre_handle lockh = { 0 };
- struct md_op_data *op_data;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ldlm_enqueue_info einfo = {
- .ei_type = LDLM_IBITS,
- .ei_mode = it_to_lock_mode(oit),
- .ei_cb_bl = &ll_md_blocking_ast,
- .ei_cb_cp = &ldlm_completion_ast,
- };
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct obd_export *exp = sbi->ll_md_exp;
- int rc;
-
- mutex_lock(&lli->lli_xattrs_enq_lock);
- /* inode may have been shrunk and recreated, so data is gone, match lock
- * only when data exists.
- */
- if (ll_xattr_cache_valid(lli)) {
- /* Try matching first. */
- mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0,
- LCK_PR);
- if (mode != 0) {
- /* fake oit in mdc_revalidate_lock() manner */
- oit->it_lock_handle = lockh.cookie;
- oit->it_lock_mode = mode;
- goto out;
- }
- }
-
- /* Enqueue if the lock isn't cached locally. */
- op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data)) {
- mutex_unlock(&lli->lli_xattrs_enq_lock);
- return PTR_ERR(op_data);
- }
-
- op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS;
-
- rc = md_enqueue(exp, &einfo, NULL, oit, op_data, &lockh, 0);
- ll_finish_md_op_data(op_data);
-
- if (rc < 0) {
- CDEBUG(D_CACHE,
- "md_intent_lock failed with %d for fid " DFID "\n",
- rc, PFID(ll_inode2fid(inode)));
- mutex_unlock(&lli->lli_xattrs_enq_lock);
- return rc;
- }
-
- *req = oit->it_request;
-out:
- down_write(&lli->lli_xattrs_list_rwsem);
- mutex_unlock(&lli->lli_xattrs_enq_lock);
-
- return 0;
-}
-
-/**
- * Refill the xattr cache.
- *
- * Fetch and cache the whole of xattrs for @inode, acquiring
- * a read or a write xattr lock depending on operation in @oit.
- * Intent is dropped on exit unless the operation is setxattr.
- *
- * \retval 0 no error occurred
- * \retval -EPROTO network protocol error
- * \retval -ENOMEM not enough memory for the cache
- */
-static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *req = NULL;
- const char *xdata, *xval, *xtail, *xvtail;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct mdt_body *body;
- __u32 *xsizes;
- int rc, i;
-
- rc = ll_xattr_find_get_lock(inode, oit, &req);
- if (rc)
- goto out_no_unlock;
-
- /* Do we have the data at this point? */
- if (ll_xattr_cache_valid(lli)) {
- ll_stats_ops_tally(sbi, LPROC_LL_GETXATTR_HITS, 1);
- rc = 0;
- goto out_maybe_drop;
- }
-
- /* Matched but no cache? Cancelled on error by a parallel refill. */
- if (unlikely(!req)) {
- CDEBUG(D_CACHE, "cancelled by a parallel getxattr\n");
- rc = -EIO;
- goto out_maybe_drop;
- }
-
- if (oit->it_status < 0) {
- CDEBUG(D_CACHE,
- "getxattr intent returned %d for fid " DFID "\n",
- oit->it_status, PFID(ll_inode2fid(inode)));
- rc = oit->it_status;
- /* xattr data is so large that we don't want to cache it */
- if (rc == -ERANGE)
- rc = -EAGAIN;
- goto out_destroy;
- }
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!body) {
- CERROR("no MDT BODY in the refill xattr reply\n");
- rc = -EPROTO;
- goto out_destroy;
- }
- /* do not need swab xattr data */
- xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
- body->mbo_eadatasize);
- xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS,
- body->mbo_aclsize);
- xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS,
- body->mbo_max_mdsize * sizeof(__u32));
- if (!xdata || !xval || !xsizes) {
- CERROR("wrong setxattr reply\n");
- rc = -EPROTO;
- goto out_destroy;
- }
-
- xtail = xdata + body->mbo_eadatasize;
- xvtail = xval + body->mbo_aclsize;
-
- CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail);
-
- ll_xattr_cache_init(lli);
-
- for (i = 0; i < body->mbo_max_mdsize; i++) {
- CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval);
- /* Perform consistency checks: attr names and vals in pill */
- if (!memchr(xdata, 0, xtail - xdata)) {
- CERROR("xattr protocol violation (names are broken)\n");
- rc = -EPROTO;
- } else if (xval + *xsizes > xvtail) {
- CERROR("xattr protocol violation (vals are broken)\n");
- rc = -EPROTO;
- } else if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_XATTR_ENOMEM)) {
- rc = -ENOMEM;
- } else if (!strcmp(xdata, XATTR_NAME_ACL_ACCESS)) {
- /* Filter out ACL ACCESS since it's cached separately */
- CDEBUG(D_CACHE, "not caching %s\n",
- XATTR_NAME_ACL_ACCESS);
- rc = 0;
- } else if (!strcmp(xdata, "security.selinux")) {
- /* Filter out security.selinux, it is cached in slab */
- CDEBUG(D_CACHE, "not caching security.selinux\n");
- rc = 0;
- } else {
- rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval,
- *xsizes);
- }
- if (rc < 0) {
- ll_xattr_cache_destroy_locked(lli);
- goto out_destroy;
- }
- xdata += strlen(xdata) + 1;
- xval += *xsizes;
- xsizes++;
- }
-
- if (xdata != xtail || xval != xvtail)
- CERROR("a hole in xattr data\n");
-
- ll_set_lock_data(sbi->ll_md_exp, inode, oit, NULL);
-
- goto out_maybe_drop;
-out_maybe_drop:
-
- ll_intent_drop_lock(oit);
-
- if (rc != 0)
- up_write(&lli->lli_xattrs_list_rwsem);
-out_no_unlock:
- ptlrpc_req_finished(req);
-
- return rc;
-
-out_destroy:
- up_write(&lli->lli_xattrs_list_rwsem);
-
- ldlm_lock_decref_and_cancel((struct lustre_handle *)
- &oit->it_lock_handle,
- oit->it_lock_mode);
-
- goto out_no_unlock;
-}
-
-/**
- * Get an xattr value or list xattrs using the write-through cache.
- *
- * Get the xattr value (@valid has OBD_MD_FLXATTR set) of @name or
- * list xattr names (@valid has OBD_MD_FLXATTRLS set) for @inode.
- * The resulting value/list is stored in @buffer if the former
- * is not larger than @size.
- *
- * \retval 0 no error occurred
- * \retval -EPROTO network protocol error
- * \retval -ENOMEM not enough memory for the cache
- * \retval -ERANGE the buffer is not large enough
- * \retval -ENODATA no such attr or the list is empty
- */
-int ll_xattr_cache_get(struct inode *inode, const char *name, char *buffer,
- size_t size, __u64 valid)
-{
- struct lookup_intent oit = { .it_op = IT_GETXATTR };
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc = 0;
-
- LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRLS));
-
- down_read(&lli->lli_xattrs_list_rwsem);
- if (!ll_xattr_cache_valid(lli)) {
- up_read(&lli->lli_xattrs_list_rwsem);
- rc = ll_xattr_cache_refill(inode, &oit);
- if (rc)
- return rc;
- downgrade_write(&lli->lli_xattrs_list_rwsem);
- } else {
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR_HITS, 1);
- }
-
- if (valid & OBD_MD_FLXATTR) {
- struct ll_xattr_entry *xattr;
-
- rc = ll_xattr_cache_find(&lli->lli_xattrs, name, &xattr);
- if (rc == 0) {
- rc = xattr->xe_vallen;
- /* zero size means we are only requested size in rc */
- if (size != 0) {
- if (size >= xattr->xe_vallen)
- memcpy(buffer, xattr->xe_value,
- xattr->xe_vallen);
- else
- rc = -ERANGE;
- }
- }
- } else if (valid & OBD_MD_FLXATTRLS) {
- rc = ll_xattr_cache_list(&lli->lli_xattrs,
- size ? buffer : NULL, size);
- }
-
- goto out;
-out:
- up_read(&lli->lli_xattrs_list_rwsem);
-
- return rc;
-}
diff --git a/drivers/staging/lustre/lustre/llite/xattr_security.c b/drivers/staging/lustre/lustre/llite/xattr_security.c
deleted file mode 100644
index 93ec07531ac7..000000000000
--- a/drivers/staging/lustre/lustre/llite/xattr_security.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright (c) 2014 Bull SAS
- * Author: Sebastien Buisson sebastien.buisson@bull.net
- */
-
-/*
- * lustre/llite/xattr_security.c
- * Handler for storing security labels as extended attributes.
- */
-
-#include <linux/types.h>
-#include <linux/security.h>
-#include <linux/selinux.h>
-#include <linux/xattr.h>
-#include "llite_internal.h"
-
-/**
- * A helper function for ll_security_inode_init_security()
- * that takes care of setting xattrs
- *
- * Get security context of @inode from @xattr_array,
- * and put it in 'security.xxx' xattr of dentry
- * stored in @fs_info.
- *
- * \retval 0 success
- * \retval -ENOMEM if no memory could be allocated for xattr name
- * \retval < 0 failure to set xattr
- */
-static int
-ll_initxattrs(struct inode *inode, const struct xattr *xattr_array,
- void *fs_info)
-{
- struct dentry *dentry = fs_info;
- const struct xattr *xattr;
- int err = 0;
-
- for (xattr = xattr_array; xattr->name; xattr++) {
- char *full_name;
-
- full_name = kasprintf(GFP_KERNEL, "%s%s",
- XATTR_SECURITY_PREFIX, xattr->name);
- if (!full_name) {
- err = -ENOMEM;
- break;
- }
-
- err = __vfs_setxattr(dentry, inode, full_name, xattr->value,
- xattr->value_len, XATTR_CREATE);
- kfree(full_name);
- if (err < 0)
- break;
- }
- return err;
-}
-
-/**
- * Initializes security context
- *
- * Get security context of @inode in @dir,
- * and put it in 'security.xxx' xattr of @dentry.
- *
- * \retval 0 success, or SELinux is disabled
- * \retval -ENOMEM if no memory could be allocated for xattr name
- * \retval < 0 failure to get security context or set xattr
- */
-int
-ll_init_security(struct dentry *dentry, struct inode *inode, struct inode *dir)
-{
- if (!selinux_is_enabled())
- return 0;
-
- return security_inode_init_security(inode, dir, NULL,
- &ll_initxattrs, dentry);
-}