diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/ldlm')
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/interval_tree.c | 599 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/l_lock.c | 74 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_extent.c | 259 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_flock.c | 495 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c | 69 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_internal.h | 342 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_lib.c | 843 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_lock.c | 2146 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c | 1163 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_plain.c | 68 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_pool.c | 1023 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_request.c | 2080 | ||||
-rw-r--r-- | drivers/staging/lustre/lustre/ldlm/ldlm_resource.c | 1369 |
13 files changed, 0 insertions, 10530 deletions
diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c deleted file mode 100644 index 8df7a4463c21..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/interval_tree.c +++ /dev/null @@ -1,599 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/interval_tree.c - * - * Interval tree library used by ldlm extent lock code - * - * Author: Huang Wei <huangwei@clusterfs.com> - * Author: Jay Xiong <jinshan.xiong@sun.com> - */ -#include <lustre_dlm.h> -#include <obd_support.h> -#include <interval_tree.h> - -enum { - INTERVAL_RED = 0, - INTERVAL_BLACK = 1 -}; - -static inline int node_is_left_child(struct interval_node *node) -{ - return node == node->in_parent->in_left; -} - -static inline int node_is_right_child(struct interval_node *node) -{ - return node == node->in_parent->in_right; -} - -static inline int node_is_red(struct interval_node *node) -{ - return node->in_color == INTERVAL_RED; -} - -static inline int node_is_black(struct interval_node *node) -{ - return node->in_color == INTERVAL_BLACK; -} - -static inline int extent_compare(struct interval_node_extent *e1, - struct interval_node_extent *e2) -{ - int rc; - - if (e1->start == e2->start) { - if (e1->end < e2->end) - rc = -1; - else if (e1->end > e2->end) - rc = 1; - else - rc = 0; - } else { - if (e1->start < e2->start) - rc = -1; - else - rc = 1; - } - return rc; -} - -static inline int extent_equal(struct interval_node_extent *e1, - struct interval_node_extent *e2) -{ - return (e1->start == e2->start) && (e1->end == e2->end); -} - -static inline int extent_overlapped(struct interval_node_extent *e1, - struct interval_node_extent *e2) -{ - return (e1->start <= e2->end) && (e2->start <= e1->end); -} - -static inline int node_equal(struct interval_node *n1, struct interval_node *n2) -{ - return extent_equal(&n1->in_extent, &n2->in_extent); -} - -static struct interval_node *interval_first(struct interval_node *node) -{ - if (!node) - return NULL; - while (node->in_left) - node = node->in_left; - return node; -} - -static struct interval_node *interval_last(struct interval_node *node) -{ - if (!node) - return NULL; - while (node->in_right) - node = node->in_right; - return node; -} - -static struct interval_node *interval_next(struct interval_node *node) -{ - if (!node) - return NULL; - if (node->in_right) - return interval_first(node->in_right); - while (node->in_parent && node_is_right_child(node)) - node = node->in_parent; - return node->in_parent; -} - -static struct interval_node *interval_prev(struct interval_node *node) -{ - if (!node) - return NULL; - - if (node->in_left) - return interval_last(node->in_left); - - while (node->in_parent && node_is_left_child(node)) - node = node->in_parent; - - return node->in_parent; -} - -enum interval_iter interval_iterate_reverse(struct interval_node *root, - interval_callback_t func, - void *data) -{ - enum interval_iter rc = INTERVAL_ITER_CONT; - struct interval_node *node; - - for (node = interval_last(root); node; node = interval_prev(node)) { - rc = func(node, data); - if (rc == INTERVAL_ITER_STOP) - break; - } - - return rc; -} -EXPORT_SYMBOL(interval_iterate_reverse); - -static void __rotate_change_maxhigh(struct interval_node *node, - struct interval_node *rotate) -{ - __u64 left_max, right_max; - - rotate->in_max_high = node->in_max_high; - left_max = node->in_left ? node->in_left->in_max_high : 0; - right_max = node->in_right ? node->in_right->in_max_high : 0; - node->in_max_high = max(interval_high(node), - max(left_max, right_max)); -} - -/* The left rotation "pivots" around the link from node to node->right, and - * - node will be linked to node->right's left child, and - * - node->right's left child will be linked to node's right child. - */ -static void __rotate_left(struct interval_node *node, - struct interval_node **root) -{ - struct interval_node *right = node->in_right; - struct interval_node *parent = node->in_parent; - - node->in_right = right->in_left; - if (node->in_right) - right->in_left->in_parent = node; - - right->in_left = node; - right->in_parent = parent; - if (parent) { - if (node_is_left_child(node)) - parent->in_left = right; - else - parent->in_right = right; - } else { - *root = right; - } - node->in_parent = right; - - /* update max_high for node and right */ - __rotate_change_maxhigh(node, right); -} - -/* The right rotation "pivots" around the link from node to node->left, and - * - node will be linked to node->left's right child, and - * - node->left's right child will be linked to node's left child. - */ -static void __rotate_right(struct interval_node *node, - struct interval_node **root) -{ - struct interval_node *left = node->in_left; - struct interval_node *parent = node->in_parent; - - node->in_left = left->in_right; - if (node->in_left) - left->in_right->in_parent = node; - left->in_right = node; - - left->in_parent = parent; - if (parent) { - if (node_is_right_child(node)) - parent->in_right = left; - else - parent->in_left = left; - } else { - *root = left; - } - node->in_parent = left; - - /* update max_high for node and left */ - __rotate_change_maxhigh(node, left); -} - -#define interval_swap(a, b) do { \ - struct interval_node *c = a; a = b; b = c; \ -} while (0) - -/* - * Operations INSERT and DELETE, when run on a tree with n keys, - * take O(logN) time.Because they modify the tree, the result - * may violate the red-black properties.To restore these properties, - * we must change the colors of some of the nodes in the tree - * and also change the pointer structure. - */ -static void interval_insert_color(struct interval_node *node, - struct interval_node **root) -{ - struct interval_node *parent, *gparent; - - while ((parent = node->in_parent) && node_is_red(parent)) { - gparent = parent->in_parent; - /* Parent is RED, so gparent must not be NULL */ - if (node_is_left_child(parent)) { - struct interval_node *uncle; - - uncle = gparent->in_right; - if (uncle && node_is_red(uncle)) { - uncle->in_color = INTERVAL_BLACK; - parent->in_color = INTERVAL_BLACK; - gparent->in_color = INTERVAL_RED; - node = gparent; - continue; - } - - if (parent->in_right == node) { - __rotate_left(parent, root); - interval_swap(node, parent); - } - - parent->in_color = INTERVAL_BLACK; - gparent->in_color = INTERVAL_RED; - __rotate_right(gparent, root); - } else { - struct interval_node *uncle; - - uncle = gparent->in_left; - if (uncle && node_is_red(uncle)) { - uncle->in_color = INTERVAL_BLACK; - parent->in_color = INTERVAL_BLACK; - gparent->in_color = INTERVAL_RED; - node = gparent; - continue; - } - - if (node_is_left_child(node)) { - __rotate_right(parent, root); - interval_swap(node, parent); - } - - parent->in_color = INTERVAL_BLACK; - gparent->in_color = INTERVAL_RED; - __rotate_left(gparent, root); - } - } - - (*root)->in_color = INTERVAL_BLACK; -} - -struct interval_node *interval_insert(struct interval_node *node, - struct interval_node **root) - -{ - struct interval_node **p, *parent = NULL; - - LASSERT(!interval_is_intree(node)); - p = root; - while (*p) { - parent = *p; - if (node_equal(parent, node)) - return parent; - - /* max_high field must be updated after each iteration */ - if (parent->in_max_high < interval_high(node)) - parent->in_max_high = interval_high(node); - - if (extent_compare(&node->in_extent, &parent->in_extent) < 0) - p = &parent->in_left; - else - p = &parent->in_right; - } - - /* link node into the tree */ - node->in_parent = parent; - node->in_color = INTERVAL_RED; - node->in_left = NULL; - node->in_right = NULL; - *p = node; - - interval_insert_color(node, root); - node->in_intree = 1; - - return NULL; -} -EXPORT_SYMBOL(interval_insert); - -static inline int node_is_black_or_0(struct interval_node *node) -{ - return !node || node_is_black(node); -} - -static void interval_erase_color(struct interval_node *node, - struct interval_node *parent, - struct interval_node **root) -{ - struct interval_node *tmp; - - while (node_is_black_or_0(node) && node != *root) { - if (parent->in_left == node) { - tmp = parent->in_right; - if (node_is_red(tmp)) { - tmp->in_color = INTERVAL_BLACK; - parent->in_color = INTERVAL_RED; - __rotate_left(parent, root); - tmp = parent->in_right; - } - if (node_is_black_or_0(tmp->in_left) && - node_is_black_or_0(tmp->in_right)) { - tmp->in_color = INTERVAL_RED; - node = parent; - parent = node->in_parent; - } else { - if (node_is_black_or_0(tmp->in_right)) { - struct interval_node *o_left; - - o_left = tmp->in_left; - if (o_left) - o_left->in_color = INTERVAL_BLACK; - tmp->in_color = INTERVAL_RED; - __rotate_right(tmp, root); - tmp = parent->in_right; - } - tmp->in_color = parent->in_color; - parent->in_color = INTERVAL_BLACK; - if (tmp->in_right) - tmp->in_right->in_color = INTERVAL_BLACK; - __rotate_left(parent, root); - node = *root; - break; - } - } else { - tmp = parent->in_left; - if (node_is_red(tmp)) { - tmp->in_color = INTERVAL_BLACK; - parent->in_color = INTERVAL_RED; - __rotate_right(parent, root); - tmp = parent->in_left; - } - if (node_is_black_or_0(tmp->in_left) && - node_is_black_or_0(tmp->in_right)) { - tmp->in_color = INTERVAL_RED; - node = parent; - parent = node->in_parent; - } else { - if (node_is_black_or_0(tmp->in_left)) { - struct interval_node *o_right; - - o_right = tmp->in_right; - if (o_right) - o_right->in_color = INTERVAL_BLACK; - tmp->in_color = INTERVAL_RED; - __rotate_left(tmp, root); - tmp = parent->in_left; - } - tmp->in_color = parent->in_color; - parent->in_color = INTERVAL_BLACK; - if (tmp->in_left) - tmp->in_left->in_color = INTERVAL_BLACK; - __rotate_right(parent, root); - node = *root; - break; - } - } - } - if (node) - node->in_color = INTERVAL_BLACK; -} - -/* - * if the @max_high value of @node is changed, this function traverse a path - * from node up to the root to update max_high for the whole tree. - */ -static void update_maxhigh(struct interval_node *node, - __u64 old_maxhigh) -{ - __u64 left_max, right_max; - - while (node) { - left_max = node->in_left ? node->in_left->in_max_high : 0; - right_max = node->in_right ? node->in_right->in_max_high : 0; - node->in_max_high = max(interval_high(node), - max(left_max, right_max)); - - if (node->in_max_high >= old_maxhigh) - break; - node = node->in_parent; - } -} - -void interval_erase(struct interval_node *node, - struct interval_node **root) -{ - struct interval_node *child, *parent; - int color; - - LASSERT(interval_is_intree(node)); - node->in_intree = 0; - if (!node->in_left) { - child = node->in_right; - } else if (!node->in_right) { - child = node->in_left; - } else { /* Both left and right child are not NULL */ - struct interval_node *old = node; - - node = interval_next(node); - child = node->in_right; - parent = node->in_parent; - color = node->in_color; - - if (child) - child->in_parent = parent; - if (parent == old) - parent->in_right = child; - else - parent->in_left = child; - - node->in_color = old->in_color; - node->in_right = old->in_right; - node->in_left = old->in_left; - node->in_parent = old->in_parent; - - if (old->in_parent) { - if (node_is_left_child(old)) - old->in_parent->in_left = node; - else - old->in_parent->in_right = node; - } else { - *root = node; - } - - old->in_left->in_parent = node; - if (old->in_right) - old->in_right->in_parent = node; - update_maxhigh(child ? : parent, node->in_max_high); - update_maxhigh(node, old->in_max_high); - if (parent == old) - parent = node; - goto color; - } - parent = node->in_parent; - color = node->in_color; - - if (child) - child->in_parent = parent; - if (parent) { - if (node_is_left_child(node)) - parent->in_left = child; - else - parent->in_right = child; - } else { - *root = child; - } - - update_maxhigh(child ? : parent, node->in_max_high); - -color: - if (color == INTERVAL_BLACK) - interval_erase_color(child, parent, root); -} -EXPORT_SYMBOL(interval_erase); - -static inline int interval_may_overlap(struct interval_node *node, - struct interval_node_extent *ext) -{ - return (ext->start <= node->in_max_high && - ext->end >= interval_low(node)); -} - -/* - * This function finds all intervals that overlap interval ext, - * and calls func to handle resulted intervals one by one. - * in lustre, this function will find all conflicting locks in - * the granted queue and add these locks to the ast work list. - * - * { - * if (!node) - * return 0; - * if (ext->end < interval_low(node)) { - * interval_search(node->in_left, ext, func, data); - * } else if (interval_may_overlap(node, ext)) { - * if (extent_overlapped(ext, &node->in_extent)) - * func(node, data); - * interval_search(node->in_left, ext, func, data); - * interval_search(node->in_right, ext, func, data); - * } - * return 0; - * } - * - */ -enum interval_iter interval_search(struct interval_node *node, - struct interval_node_extent *ext, - interval_callback_t func, - void *data) -{ - enum interval_iter rc = INTERVAL_ITER_CONT; - struct interval_node *parent; - - LASSERT(ext); - LASSERT(func); - - while (node) { - if (ext->end < interval_low(node)) { - if (node->in_left) { - node = node->in_left; - continue; - } - } else if (interval_may_overlap(node, ext)) { - if (extent_overlapped(ext, &node->in_extent)) { - rc = func(node, data); - if (rc == INTERVAL_ITER_STOP) - break; - } - - if (node->in_left) { - node = node->in_left; - continue; - } - if (node->in_right) { - node = node->in_right; - continue; - } - } - - parent = node->in_parent; - while (parent) { - if (node_is_left_child(node) && - parent->in_right) { - /* - * If we ever got the left, it means that the - * parent met ext->end<interval_low(parent), or - * may_overlap(parent). If the former is true, - * we needn't go back. So stop early and check - * may_overlap(parent) after this loop. - */ - node = parent->in_right; - break; - } - node = parent; - parent = parent->in_parent; - } - if (!parent || !interval_may_overlap(parent, ext)) - break; - } - - return rc; -} -EXPORT_SYMBOL(interval_search); diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c deleted file mode 100644 index 0662cec14b81..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/l_lock.c +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LDLM -#include <linux/libcfs/libcfs.h> - -#include <lustre_dlm.h> -#include <lustre_lib.h> - -/** - * Lock a lock and its resource. - * - * LDLM locking uses resource to serialize access to locks - * but there is a case when we change resource of lock upon - * enqueue reply. We rely on lock->l_resource = new_res - * being an atomic operation. - */ -struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock) - __acquires(&lock->l_lock) - __acquires(&lock->l_resource->lr_lock) -{ - spin_lock(&lock->l_lock); - - lock_res(lock->l_resource); - - ldlm_set_res_locked(lock); - return lock->l_resource; -} -EXPORT_SYMBOL(lock_res_and_lock); - -/** - * Unlock a lock and its resource previously locked with lock_res_and_lock - */ -void unlock_res_and_lock(struct ldlm_lock *lock) - __releases(&lock->l_resource->lr_lock) - __releases(&lock->l_lock) -{ - /* on server-side resource of lock doesn't change */ - ldlm_clear_res_locked(lock); - - unlock_res(lock->l_resource); - spin_unlock(&lock->l_lock); -} -EXPORT_SYMBOL(unlock_res_and_lock); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c deleted file mode 100644 index 11b11b5f3216..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c +++ /dev/null @@ -1,259 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_extent.c - * - * Author: Peter Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - */ - -/** - * This file contains implementation of EXTENT lock type - * - * EXTENT lock type is for locking a contiguous range of values, represented - * by 64-bit starting and ending offsets (inclusive). There are several extent - * lock modes, some of which may be mutually incompatible. Extent locks are - * considered incompatible if their modes are incompatible and their extents - * intersect. See the lock mode compatibility matrix in lustre_dlm.h. - */ - -#define DEBUG_SUBSYSTEM S_LDLM -#include <linux/libcfs/libcfs.h> -#include <lustre_dlm.h> -#include <obd_support.h> -#include <obd.h> -#include <obd_class.h> -#include <lustre_lib.h> -#include "ldlm_internal.h" - -/* When a lock is cancelled by a client, the KMS may undergo change if this - * is the "highest lock". This function returns the new KMS value. - * Caller must hold lr_lock already. - * - * NB: A lock on [x,y] protects a KMS of up to y + 1 bytes! - */ -__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms) -{ - struct ldlm_resource *res = lock->l_resource; - struct ldlm_lock *lck; - __u64 kms = 0; - - /* don't let another thread in ldlm_extent_shift_kms race in - * just after we finish and take our lock into account in its - * calculation of the kms - */ - ldlm_set_kms_ignore(lock); - - list_for_each_entry(lck, &res->lr_granted, l_res_link) { - - if (ldlm_is_kms_ignore(lck)) - continue; - - if (lck->l_policy_data.l_extent.end >= old_kms) - return old_kms; - - /* This extent _has_ to be smaller than old_kms (checked above) - * so kms can only ever be smaller or the same as old_kms. - */ - if (lck->l_policy_data.l_extent.end + 1 > kms) - kms = lck->l_policy_data.l_extent.end + 1; - } - LASSERTF(kms <= old_kms, "kms %llu old_kms %llu\n", kms, old_kms); - - return kms; -} -EXPORT_SYMBOL(ldlm_extent_shift_kms); - -struct kmem_cache *ldlm_interval_slab; - -/* interval tree, for LDLM_EXTENT. */ -static void ldlm_interval_attach(struct ldlm_interval *n, struct ldlm_lock *l) -{ - LASSERT(!l->l_tree_node); - LASSERT(l->l_resource->lr_type == LDLM_EXTENT); - - list_add_tail(&l->l_sl_policy, &n->li_group); - l->l_tree_node = n; -} - -struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock) -{ - struct ldlm_interval *node; - - LASSERT(lock->l_resource->lr_type == LDLM_EXTENT); - node = kmem_cache_zalloc(ldlm_interval_slab, GFP_NOFS); - if (!node) - return NULL; - - INIT_LIST_HEAD(&node->li_group); - ldlm_interval_attach(node, lock); - return node; -} - -void ldlm_interval_free(struct ldlm_interval *node) -{ - if (node) { - LASSERT(list_empty(&node->li_group)); - LASSERT(!interval_is_intree(&node->li_node)); - kmem_cache_free(ldlm_interval_slab, node); - } -} - -struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l) -{ - struct ldlm_interval *n = l->l_tree_node; - - if (!n) - return NULL; - - LASSERT(!list_empty(&n->li_group)); - l->l_tree_node = NULL; - list_del_init(&l->l_sl_policy); - - return list_empty(&n->li_group) ? n : NULL; -} - -static inline int lock_mode_to_index(enum ldlm_mode mode) -{ - int index; - - LASSERT(mode != 0); - LASSERT(is_power_of_2(mode)); - for (index = -1; mode; index++) - mode >>= 1; - LASSERT(index < LCK_MODE_NUM); - return index; -} - -/** Add newly granted lock into interval tree for the resource. */ -void ldlm_extent_add_lock(struct ldlm_resource *res, - struct ldlm_lock *lock) -{ - struct interval_node *found, **root; - struct ldlm_interval *node; - struct ldlm_extent *extent; - int idx, rc; - - LASSERT(lock->l_granted_mode == lock->l_req_mode); - - node = lock->l_tree_node; - LASSERT(node); - LASSERT(!interval_is_intree(&node->li_node)); - - idx = lock_mode_to_index(lock->l_granted_mode); - LASSERT(lock->l_granted_mode == 1 << idx); - LASSERT(lock->l_granted_mode == res->lr_itree[idx].lit_mode); - - /* node extent initialize */ - extent = &lock->l_policy_data.l_extent; - rc = interval_set(&node->li_node, extent->start, extent->end); - LASSERT(!rc); - - root = &res->lr_itree[idx].lit_root; - found = interval_insert(&node->li_node, root); - if (found) { /* The policy group found. */ - struct ldlm_interval *tmp; - - tmp = ldlm_interval_detach(lock); - ldlm_interval_free(tmp); - ldlm_interval_attach(to_ldlm_interval(found), lock); - } - res->lr_itree[idx].lit_size++; - - /* even though we use interval tree to manage the extent lock, we also - * add the locks into grant list, for debug purpose, .. - */ - ldlm_resource_add_lock(res, &res->lr_granted, lock); - - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GRANT_CHECK)) { - struct ldlm_lock *lck; - - list_for_each_entry_reverse(lck, &res->lr_granted, - l_res_link) { - if (lck == lock) - continue; - if (lockmode_compat(lck->l_granted_mode, - lock->l_granted_mode)) - continue; - if (ldlm_extent_overlap(&lck->l_req_extent, - &lock->l_req_extent)) { - CDEBUG(D_ERROR, - "granting conflicting lock %p %p\n", - lck, lock); - ldlm_resource_dump(D_ERROR, res); - LBUG(); - } - } - } -} - -/** Remove cancelled lock from resource interval tree. */ -void ldlm_extent_unlink_lock(struct ldlm_lock *lock) -{ - struct ldlm_resource *res = lock->l_resource; - struct ldlm_interval *node = lock->l_tree_node; - struct ldlm_interval_tree *tree; - int idx; - - if (!node || !interval_is_intree(&node->li_node)) /* duplicate unlink */ - return; - - idx = lock_mode_to_index(lock->l_granted_mode); - LASSERT(lock->l_granted_mode == 1 << idx); - tree = &res->lr_itree[idx]; - - LASSERT(tree->lit_root); /* assure the tree is not null */ - - tree->lit_size--; - node = ldlm_interval_detach(lock); - if (node) { - interval_erase(&node->li_node, &tree->lit_root); - ldlm_interval_free(node); - } -} - -void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy) -{ - lpolicy->l_extent.start = wpolicy->l_extent.start; - lpolicy->l_extent.end = wpolicy->l_extent.end; - lpolicy->l_extent.gid = wpolicy->l_extent.gid; -} - -void ldlm_extent_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy) -{ - memset(wpolicy, 0, sizeof(*wpolicy)); - wpolicy->l_extent.start = lpolicy->l_extent.start; - wpolicy->l_extent.end = lpolicy->l_extent.end; - wpolicy->l_extent.gid = lpolicy->l_extent.gid; -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c deleted file mode 100644 index 411b540b96d9..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c +++ /dev/null @@ -1,495 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003 Hewlett-Packard Development Company LP. - * Developed under the sponsorship of the US Government under - * Subcontract No. B514193 - * - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -/** - * This file implements POSIX lock type for Lustre. - * Its policy properties are start and end of extent and PID. - * - * These locks are only done through MDS due to POSIX semantics requiring - * e.g. that locks could be only partially released and as such split into - * two parts, and also that two adjacent locks from the same process may be - * merged into a single wider lock. - * - * Lock modes are mapped like this: - * PR and PW for READ and WRITE locks - * NL to request a releasing of a portion of the lock - * - * These flock locks never timeout. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include <lustre_dlm.h> -#include <obd_support.h> -#include <obd_class.h> -#include <lustre_lib.h> -#include <linux/list.h> -#include "ldlm_internal.h" - -static inline int -ldlm_same_flock_owner(struct ldlm_lock *lock, struct ldlm_lock *new) -{ - return((new->l_policy_data.l_flock.owner == - lock->l_policy_data.l_flock.owner) && - (new->l_export == lock->l_export)); -} - -static inline int -ldlm_flocks_overlap(struct ldlm_lock *lock, struct ldlm_lock *new) -{ - return((new->l_policy_data.l_flock.start <= - lock->l_policy_data.l_flock.end) && - (new->l_policy_data.l_flock.end >= - lock->l_policy_data.l_flock.start)); -} - -static inline void -ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode) -{ - LDLM_DEBUG(lock, "%s(mode: %d)", - __func__, mode); - - /* Safe to not lock here, since it should be empty anyway */ - LASSERT(hlist_unhashed(&lock->l_exp_flock_hash)); - - list_del_init(&lock->l_res_link); - - /* client side - set a flag to prevent sending a CANCEL */ - lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING; - - /* when reaching here, it is under lock_res_and_lock(). Thus, - * need call the nolock version of ldlm_lock_decref_internal - */ - ldlm_lock_decref_internal_nolock(lock, mode); - - ldlm_lock_destroy_nolock(lock); -} - -/** - * Process a granting attempt for flock lock. - * Must be called under ns lock held. - * - * This function looks for any conflicts for \a lock in the granted or - * waiting queues. The lock is granted if no conflicts are found in - * either queue. - * - * It is also responsible for splitting a lock if a portion of the lock - * is released. - * - */ -static int ldlm_process_flock_lock(struct ldlm_lock *req) -{ - struct ldlm_resource *res = req->l_resource; - struct ldlm_namespace *ns = ldlm_res_to_ns(res); - struct ldlm_lock *tmp; - struct ldlm_lock *lock; - struct ldlm_lock *new = req; - struct ldlm_lock *new2 = NULL; - enum ldlm_mode mode = req->l_req_mode; - int added = (mode == LCK_NL); - int splitted = 0; - const struct ldlm_callback_suite null_cbs = { }; - - CDEBUG(D_DLMTRACE, - "owner %llu pid %u mode %u start %llu end %llu\n", - new->l_policy_data.l_flock.owner, - new->l_policy_data.l_flock.pid, mode, - req->l_policy_data.l_flock.start, - req->l_policy_data.l_flock.end); - - /* No blocking ASTs are sent to the clients for - * Posix file & record locks - */ - req->l_blocking_ast = NULL; - -reprocess: - /* This loop determines where this processes locks start - * in the resource lr_granted list. - */ - list_for_each_entry(lock, &res->lr_granted, l_res_link) - if (ldlm_same_flock_owner(lock, req)) - break; - - /* Scan the locks owned by this process to find the insertion point - * (as locks are ordered), and to handle overlaps. - * We may have to merge or split existing locks. - */ - list_for_each_entry_safe_from(lock, tmp, &res->lr_granted, l_res_link) { - - if (!ldlm_same_flock_owner(lock, new)) - break; - - if (lock->l_granted_mode == mode) { - /* If the modes are the same then we need to process - * locks that overlap OR adjoin the new lock. The extra - * logic condition is necessary to deal with arithmetic - * overflow and underflow. - */ - if ((new->l_policy_data.l_flock.start > - (lock->l_policy_data.l_flock.end + 1)) && - (lock->l_policy_data.l_flock.end != OBD_OBJECT_EOF)) - continue; - - if ((new->l_policy_data.l_flock.end < - (lock->l_policy_data.l_flock.start - 1)) && - (lock->l_policy_data.l_flock.start != 0)) - break; - - if (new->l_policy_data.l_flock.start < - lock->l_policy_data.l_flock.start) { - lock->l_policy_data.l_flock.start = - new->l_policy_data.l_flock.start; - } else { - new->l_policy_data.l_flock.start = - lock->l_policy_data.l_flock.start; - } - - if (new->l_policy_data.l_flock.end > - lock->l_policy_data.l_flock.end) { - lock->l_policy_data.l_flock.end = - new->l_policy_data.l_flock.end; - } else { - new->l_policy_data.l_flock.end = - lock->l_policy_data.l_flock.end; - } - - if (added) { - ldlm_flock_destroy(lock, mode); - } else { - new = lock; - added = 1; - } - continue; - } - - if (new->l_policy_data.l_flock.start > - lock->l_policy_data.l_flock.end) - continue; - - if (new->l_policy_data.l_flock.end < - lock->l_policy_data.l_flock.start) - break; - - if (new->l_policy_data.l_flock.start <= - lock->l_policy_data.l_flock.start) { - if (new->l_policy_data.l_flock.end < - lock->l_policy_data.l_flock.end) { - lock->l_policy_data.l_flock.start = - new->l_policy_data.l_flock.end + 1; - break; - } - ldlm_flock_destroy(lock, lock->l_req_mode); - continue; - } - if (new->l_policy_data.l_flock.end >= - lock->l_policy_data.l_flock.end) { - lock->l_policy_data.l_flock.end = - new->l_policy_data.l_flock.start - 1; - continue; - } - - /* split the existing lock into two locks */ - - /* if this is an F_UNLCK operation then we could avoid - * allocating a new lock and use the req lock passed in - * with the request but this would complicate the reply - * processing since updates to req get reflected in the - * reply. The client side replays the lock request so - * it must see the original lock data in the reply. - */ - - /* XXX - if ldlm_lock_new() can sleep we should - * release the lr_lock, allocate the new lock, - * and restart processing this lock. - */ - if (!new2) { - unlock_res_and_lock(req); - new2 = ldlm_lock_create(ns, &res->lr_name, LDLM_FLOCK, - lock->l_granted_mode, &null_cbs, - NULL, 0, LVB_T_NONE); - lock_res_and_lock(req); - if (IS_ERR(new2)) { - ldlm_flock_destroy(req, lock->l_granted_mode); - return LDLM_ITER_STOP; - } - goto reprocess; - } - - splitted = 1; - - new2->l_granted_mode = lock->l_granted_mode; - new2->l_policy_data.l_flock.pid = - new->l_policy_data.l_flock.pid; - new2->l_policy_data.l_flock.owner = - new->l_policy_data.l_flock.owner; - new2->l_policy_data.l_flock.start = - lock->l_policy_data.l_flock.start; - new2->l_policy_data.l_flock.end = - new->l_policy_data.l_flock.start - 1; - lock->l_policy_data.l_flock.start = - new->l_policy_data.l_flock.end + 1; - new2->l_conn_export = lock->l_conn_export; - if (lock->l_export) { - new2->l_export = class_export_lock_get(lock->l_export, - new2); - if (new2->l_export->exp_lock_hash && - hlist_unhashed(&new2->l_exp_hash)) - cfs_hash_add(new2->l_export->exp_lock_hash, - &new2->l_remote_handle, - &new2->l_exp_hash); - } - ldlm_lock_addref_internal_nolock(new2, - lock->l_granted_mode); - - /* insert new2 at lock */ - ldlm_resource_add_lock(res, &lock->l_res_link, new2); - LDLM_LOCK_RELEASE(new2); - break; - } - - /* if new2 is created but never used, destroy it*/ - if (splitted == 0 && new2) - ldlm_lock_destroy_nolock(new2); - - /* At this point we're granting the lock request. */ - req->l_granted_mode = req->l_req_mode; - - if (!added) { - list_del_init(&req->l_res_link); - /* insert new lock before "lock", which might be the - * next lock for this owner, or might be the first - * lock for the next owner, or might not be a lock at - * all, but instead points at the head of the list - */ - ldlm_resource_add_lock(res, &lock->l_res_link, req); - } - - /* In case we're reprocessing the requested lock we can't destroy - * it until after calling ldlm_add_ast_work_item() above so that laawi() - * can bump the reference count on \a req. Otherwise \a req - * could be freed before the completion AST can be sent. - */ - if (added) - ldlm_flock_destroy(req, mode); - - ldlm_resource_dump(D_INFO, res); - return LDLM_ITER_CONTINUE; -} - -/** - * Flock completion callback function. - * - * \param lock [in,out]: A lock to be handled - * \param flags [in]: flags - * \param *data [in]: ldlm_work_cp_ast_lock() will use ldlm_cb_set_arg - * - * \retval 0 : success - * \retval <0 : failure - */ -int -ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) -{ - struct file_lock *getlk = lock->l_ast_data; - int rc = 0; - - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT2, 4); - if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT3)) { - lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_FAIL_LOC; - unlock_res_and_lock(lock); - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT3, 4); - } - CDEBUG(D_DLMTRACE, "flags: 0x%llx data: %p getlk: %p\n", - flags, data, getlk); - - LASSERT(flags != LDLM_FL_WAIT_NOREPROC); - - if (flags & LDLM_FL_FAILED) - goto granted; - - if (!(flags & LDLM_FL_BLOCKED_MASK)) { - if (!data) - /* mds granted the lock in the reply */ - goto granted; - /* CP AST RPC: lock get granted, wake it up */ - wake_up(&lock->l_waitq); - return 0; - } - - LDLM_DEBUG(lock, - "client-side enqueue returned a blocked lock, sleeping"); - - /* Go to sleep until the lock is granted. */ - rc = l_wait_event_abortable(lock->l_waitq, is_granted_or_cancelled(lock)); - - if (rc) { - lock_res_and_lock(lock); - - /* client side - set flag to prevent lock from being put on LRU list */ - ldlm_set_cbpending(lock); - unlock_res_and_lock(lock); - - LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", - rc); - return rc; - } - -granted: - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10); - - if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT4)) { - lock_res_and_lock(lock); - /* DEADLOCK is always set with CBPENDING */ - lock->l_flags |= LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING; - unlock_res_and_lock(lock); - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT4, 4); - } - if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT5)) { - lock_res_and_lock(lock); - /* DEADLOCK is always set with CBPENDING */ - lock->l_flags |= LDLM_FL_FAIL_LOC | - LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING; - unlock_res_and_lock(lock); - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT5, 4); - } - - lock_res_and_lock(lock); - - /* - * Protect against race where lock could have been just destroyed - * due to overlap in ldlm_process_flock_lock(). - */ - if (ldlm_is_destroyed(lock)) { - unlock_res_and_lock(lock); - LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); - /* - * An error is still to be returned, to propagate it up to - * ldlm_cli_enqueue_fini() caller. - */ - return -EIO; - } - - /* ldlm_lock_enqueue() has already placed lock on the granted list. */ - ldlm_resource_unlink_lock(lock); - - /* - * Import invalidation. We need to actually release the lock - * references being held, so that it can go away. No point in - * holding the lock even if app still believes it has it, since - * server already dropped it anyway. Only for granted locks too. - */ - /* Do the same for DEADLOCK'ed locks. */ - if (ldlm_is_failed(lock) || ldlm_is_flock_deadlock(lock)) { - int mode; - - if (flags & LDLM_FL_TEST_LOCK) - LASSERT(ldlm_is_test_lock(lock)); - - if (ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock)) - mode = getlk->fl_type; - else - mode = lock->l_granted_mode; - - if (ldlm_is_flock_deadlock(lock)) { - LDLM_DEBUG(lock, - "client-side enqueue deadlock received"); - rc = -EDEADLK; - } - ldlm_flock_destroy(lock, mode); - unlock_res_and_lock(lock); - - /* Need to wake up the waiter if we were evicted */ - wake_up(&lock->l_waitq); - - /* - * An error is still to be returned, to propagate it up to - * ldlm_cli_enqueue_fini() caller. - */ - return rc ? : -EIO; - } - - LDLM_DEBUG(lock, "client-side enqueue granted"); - - if (flags & LDLM_FL_TEST_LOCK) { - /* fcntl(F_GETLK) request */ - /* The old mode was saved in getlk->fl_type so that if the mode - * in the lock changes we can decref the appropriate refcount. - */ - LASSERT(ldlm_is_test_lock(lock)); - ldlm_flock_destroy(lock, getlk->fl_type); - switch (lock->l_granted_mode) { - case LCK_PR: - getlk->fl_type = F_RDLCK; - break; - case LCK_PW: - getlk->fl_type = F_WRLCK; - break; - default: - getlk->fl_type = F_UNLCK; - } - getlk->fl_pid = -(pid_t)lock->l_policy_data.l_flock.pid; - getlk->fl_start = (loff_t)lock->l_policy_data.l_flock.start; - getlk->fl_end = (loff_t)lock->l_policy_data.l_flock.end; - } else { - /* We need to reprocess the lock to do merges or splits - * with existing locks owned by this process. - */ - ldlm_process_flock_lock(lock); - } - unlock_res_and_lock(lock); - return rc; -} -EXPORT_SYMBOL(ldlm_flock_completion_ast); - -void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy) -{ - lpolicy->l_flock.start = wpolicy->l_flock.lfw_start; - lpolicy->l_flock.end = wpolicy->l_flock.lfw_end; - lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid; - lpolicy->l_flock.owner = wpolicy->l_flock.lfw_owner; -} - -void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy) -{ - memset(wpolicy, 0, sizeof(*wpolicy)); - wpolicy->l_flock.lfw_start = lpolicy->l_flock.start; - wpolicy->l_flock.lfw_end = lpolicy->l_flock.end; - wpolicy->l_flock.lfw_pid = lpolicy->l_flock.pid; - wpolicy->l_flock.lfw_owner = lpolicy->l_flock.owner; -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c deleted file mode 100644 index 2926208cdfa1..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_inodebits.c - * - * Author: Peter Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - */ - -/** - * This file contains implementation of IBITS lock type - * - * IBITS lock type contains a bit mask determining various properties of an - * object. The meanings of specific bits are specific to the caller and are - * opaque to LDLM code. - * - * Locks with intersecting bitmasks and conflicting lock modes (e.g. LCK_PW) - * are considered conflicting. See the lock mode compatibility matrix - * in lustre_dlm.h. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include <lustre_dlm.h> -#include <obd_support.h> -#include <lustre_lib.h> -#include "ldlm_internal.h" - -void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy) -{ - lpolicy->l_inodebits.bits = wpolicy->l_inodebits.bits; -} - -void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy) -{ - memset(wpolicy, 0, sizeof(*wpolicy)); - wpolicy->l_inodebits.bits = lpolicy->l_inodebits.bits; -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h deleted file mode 100644 index bc33ca100620..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h +++ /dev/null @@ -1,342 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define MAX_STRING_SIZE 128 - -extern int ldlm_srv_namespace_nr; -extern int ldlm_cli_namespace_nr; -extern struct mutex ldlm_srv_namespace_lock; -extern struct list_head ldlm_srv_namespace_list; -extern struct mutex ldlm_cli_namespace_lock; -extern struct list_head ldlm_cli_active_namespace_list; - -static inline int ldlm_namespace_nr_read(enum ldlm_side client) -{ - return client == LDLM_NAMESPACE_SERVER ? - ldlm_srv_namespace_nr : ldlm_cli_namespace_nr; -} - -static inline void ldlm_namespace_nr_inc(enum ldlm_side client) -{ - if (client == LDLM_NAMESPACE_SERVER) - ldlm_srv_namespace_nr++; - else - ldlm_cli_namespace_nr++; -} - -static inline void ldlm_namespace_nr_dec(enum ldlm_side client) -{ - if (client == LDLM_NAMESPACE_SERVER) - ldlm_srv_namespace_nr--; - else - ldlm_cli_namespace_nr--; -} - -static inline struct list_head *ldlm_namespace_list(enum ldlm_side client) -{ - return client == LDLM_NAMESPACE_SERVER ? - &ldlm_srv_namespace_list : &ldlm_cli_active_namespace_list; -} - -static inline struct mutex *ldlm_namespace_lock(enum ldlm_side client) -{ - return client == LDLM_NAMESPACE_SERVER ? - &ldlm_srv_namespace_lock : &ldlm_cli_namespace_lock; -} - -/* ns_bref is the number of resources in this namespace */ -static inline int ldlm_ns_empty(struct ldlm_namespace *ns) -{ - return atomic_read(&ns->ns_bref) == 0; -} - -void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *ns, - enum ldlm_side client); -void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns, - enum ldlm_side client); -struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side client); - -/* ldlm_request.c */ -/* Cancel lru flag, it indicates we cancel aged locks. */ -enum { - LDLM_LRU_FLAG_AGED = BIT(0), /* Cancel old non-LRU resize locks */ - LDLM_LRU_FLAG_PASSED = BIT(1), /* Cancel passed number of locks. */ - LDLM_LRU_FLAG_SHRINK = BIT(2), /* Cancel locks from shrinker. */ - LDLM_LRU_FLAG_LRUR = BIT(3), /* Cancel locks from lru resize. */ - LDLM_LRU_FLAG_NO_WAIT = BIT(4), /* Cancel locks w/o blocking (neither - * sending nor waiting for any rpcs) - */ - LDLM_LRU_FLAG_LRUR_NO_WAIT = BIT(5), /* LRUR + NO_WAIT */ -}; - -int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, - enum ldlm_cancel_flags sync, int flags); -int ldlm_cancel_lru_local(struct ldlm_namespace *ns, - struct list_head *cancels, int count, int max, - enum ldlm_cancel_flags cancel_flags, int flags); -extern unsigned int ldlm_enqueue_min; -extern unsigned int ldlm_cancel_unused_locks_before_replay; - -/* ldlm_lock.c */ - -struct ldlm_cb_set_arg { - struct ptlrpc_request_set *set; - int type; /* LDLM_{CP,BL,GL}_CALLBACK */ - atomic_t restart; - struct list_head *list; - union ldlm_gl_desc *gl_desc; /* glimpse AST descriptor */ -}; - -enum ldlm_desc_ast_t { - LDLM_WORK_BL_AST, - LDLM_WORK_CP_AST, - LDLM_WORK_REVOKE_AST, - LDLM_WORK_GL_AST -}; - -void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list); -int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill, - enum req_location loc, void *data, int size); -struct ldlm_lock * -ldlm_lock_create(struct ldlm_namespace *ns, const struct ldlm_res_id *id, - enum ldlm_type type, enum ldlm_mode mode, - const struct ldlm_callback_suite *cbs, - void *data, __u32 lvb_len, enum lvb_type lvb_type); -enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns, - struct ldlm_lock **lock, void *cookie, - __u64 *flags); -void ldlm_lock_addref_internal(struct ldlm_lock *lock, enum ldlm_mode mode); -void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, - enum ldlm_mode mode); -void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode); -void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, - enum ldlm_mode mode); -int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list, - enum ldlm_desc_ast_t ast_type); -int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use); -#define ldlm_lock_remove_from_lru(lock) ldlm_lock_remove_from_lru_check(lock, 0) -int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock); -void ldlm_lock_destroy_nolock(struct ldlm_lock *lock); - -/* ldlm_lockd.c */ -int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, - struct ldlm_lock *lock); -int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, - struct list_head *cancels, int count, - enum ldlm_cancel_flags cancel_flags); -int ldlm_bl_thread_wakeup(void); - -void ldlm_handle_bl_callback(struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, struct ldlm_lock *lock); - -extern struct kmem_cache *ldlm_resource_slab; -extern struct kset *ldlm_ns_kset; - -/* ldlm_lockd.c & ldlm_lock.c */ -extern struct kmem_cache *ldlm_lock_slab; - -/* ldlm_extent.c */ -void ldlm_extent_add_lock(struct ldlm_resource *res, struct ldlm_lock *lock); -void ldlm_extent_unlink_lock(struct ldlm_lock *lock); - -/* l_lock.c */ -void l_check_ns_lock(struct ldlm_namespace *ns); -void l_check_no_ns_lock(struct ldlm_namespace *ns); - -extern struct dentry *ldlm_svc_debugfs_dir; - -struct ldlm_state { - struct ptlrpc_service *ldlm_cb_service; - struct ptlrpc_service *ldlm_cancel_service; - struct ptlrpc_client *ldlm_client; - struct ptlrpc_connection *ldlm_server_conn; - struct ldlm_bl_pool *ldlm_bl_pool; -}; - -/* ldlm_pool.c */ -__u64 ldlm_pool_get_slv(struct ldlm_pool *pl); -void ldlm_pool_set_clv(struct ldlm_pool *pl, __u64 clv); -__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl); - -/* interval tree, for LDLM_EXTENT. */ -extern struct kmem_cache *ldlm_interval_slab; /* slab cache for ldlm_interval */ -struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l); -struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock); -void ldlm_interval_free(struct ldlm_interval *node); -/* this function must be called with res lock held */ -static inline struct ldlm_extent * -ldlm_interval_extent(struct ldlm_interval *node) -{ - struct ldlm_lock *lock; - - LASSERT(!list_empty(&node->li_group)); - - lock = list_entry(node->li_group.next, struct ldlm_lock, l_sl_policy); - return &lock->l_policy_data.l_extent; -} - -int ldlm_init(void); -void ldlm_exit(void); - -enum ldlm_policy_res { - LDLM_POLICY_CANCEL_LOCK, - LDLM_POLICY_KEEP_LOCK, - LDLM_POLICY_SKIP_LOCK -}; - -#define LDLM_POOL_SYSFS_PRINT_int(v) sprintf(buf, "%d\n", v) -#define LDLM_POOL_SYSFS_SET_int(a, b) { a = b; } -#define LDLM_POOL_SYSFS_PRINT_u64(v) sprintf(buf, "%lld\n", v) -#define LDLM_POOL_SYSFS_SET_u64(a, b) { a = b; } -#define LDLM_POOL_SYSFS_PRINT_atomic(v) sprintf(buf, "%d\n", atomic_read(&v)) -#define LDLM_POOL_SYSFS_SET_atomic(a, b) atomic_set(&a, b) - -#define LDLM_POOL_SYSFS_READER_SHOW(var, type) \ - static ssize_t var##_show(struct kobject *kobj, \ - struct attribute *attr, \ - char *buf) \ - { \ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \ - pl_kobj); \ - type tmp; \ - \ - spin_lock(&pl->pl_lock); \ - tmp = pl->pl_##var; \ - spin_unlock(&pl->pl_lock); \ - \ - return LDLM_POOL_SYSFS_PRINT_##type(tmp); \ - } \ - struct __##var##__dummy_read {; } /* semicolon catcher */ - -#define LDLM_POOL_SYSFS_WRITER_STORE(var, type) \ - static ssize_t var##_store(struct kobject *kobj, \ - struct attribute *attr, \ - const char *buffer, \ - size_t count) \ - { \ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \ - pl_kobj); \ - unsigned long tmp; \ - int rc; \ - \ - rc = kstrtoul(buffer, 10, &tmp); \ - if (rc < 0) { \ - return rc; \ - } \ - \ - spin_lock(&pl->pl_lock); \ - LDLM_POOL_SYSFS_SET_##type(pl->pl_##var, tmp); \ - spin_unlock(&pl->pl_lock); \ - \ - return count; \ - } \ - struct __##var##__dummy_write {; } /* semicolon catcher */ - -#define LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(var, type) \ - static ssize_t var##_show(struct kobject *kobj, \ - struct attribute *attr, \ - char *buf) \ - { \ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \ - pl_kobj); \ - \ - return LDLM_POOL_SYSFS_PRINT_##type(pl->pl_##var); \ - } \ - struct __##var##__dummy_read {; } /* semicolon catcher */ - -#define LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(var, type) \ - static ssize_t var##_store(struct kobject *kobj, \ - struct attribute *attr, \ - const char *buffer, \ - size_t count) \ - { \ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \ - pl_kobj); \ - unsigned long tmp; \ - int rc; \ - \ - rc = kstrtoul(buffer, 10, &tmp); \ - if (rc < 0) { \ - return rc; \ - } \ - \ - LDLM_POOL_SYSFS_SET_##type(pl->pl_##var, tmp); \ - \ - return count; \ - } \ - struct __##var##__dummy_write {; } /* semicolon catcher */ - -static inline int is_granted_or_cancelled(struct ldlm_lock *lock) -{ - int ret = 0; - - lock_res_and_lock(lock); - if ((lock->l_req_mode == lock->l_granted_mode) && - !ldlm_is_cp_reqd(lock)) - ret = 1; - else if (ldlm_is_failed(lock) || ldlm_is_cancel(lock)) - ret = 1; - unlock_res_and_lock(lock); - - return ret; -} - -typedef void (*ldlm_policy_wire_to_local_t)(const union ldlm_wire_policy_data *, - union ldlm_policy_data *); - -typedef void (*ldlm_policy_local_to_wire_t)(const union ldlm_policy_data *, - union ldlm_wire_policy_data *); - -void ldlm_plain_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy); -void ldlm_plain_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy); -void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy); -void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy); -void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy); -void ldlm_extent_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy); -void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy); -void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy); - -static inline bool ldlm_res_eq(const struct ldlm_res_id *res0, - const struct ldlm_res_id *res1) -{ - return memcmp(res0, res1, sizeof(*res0)) == 0; -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c deleted file mode 100644 index 9efd26ec59dd..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c +++ /dev/null @@ -1,843 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -/** - * This file deals with various client/target related logic including recovery. - * - * TODO: This code more logically belongs in the ptlrpc module than in ldlm and - * should be moved. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include <linux/libcfs/libcfs.h> -#include <obd.h> -#include <obd_class.h> -#include <lustre_dlm.h> -#include <lustre_net.h> -#include <lustre_sec.h> -#include "ldlm_internal.h" - -/* @priority: If non-zero, move the selected connection to the list head. - * @create: If zero, only search in existing connections. - */ -static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority, int create) -{ - struct ptlrpc_connection *ptlrpc_conn; - struct obd_import_conn *imp_conn = NULL, *item; - int rc = 0; - - if (!create && !priority) { - CDEBUG(D_HA, "Nothing to do\n"); - return -EINVAL; - } - - ptlrpc_conn = ptlrpc_uuid_to_connection(uuid); - if (!ptlrpc_conn) { - CDEBUG(D_HA, "can't find connection %s\n", uuid->uuid); - return -ENOENT; - } - - if (create) { - imp_conn = kzalloc(sizeof(*imp_conn), GFP_NOFS); - if (!imp_conn) { - rc = -ENOMEM; - goto out_put; - } - } - - spin_lock(&imp->imp_lock); - list_for_each_entry(item, &imp->imp_conn_list, oic_item) { - if (obd_uuid_equals(uuid, &item->oic_uuid)) { - if (priority) { - list_del(&item->oic_item); - list_add(&item->oic_item, - &imp->imp_conn_list); - item->oic_last_attempt = 0; - } - CDEBUG(D_HA, "imp %p@%s: found existing conn %s%s\n", - imp, imp->imp_obd->obd_name, uuid->uuid, - (priority ? ", moved to head" : "")); - spin_unlock(&imp->imp_lock); - rc = 0; - goto out_free; - } - } - /* No existing import connection found for \a uuid. */ - if (create) { - imp_conn->oic_conn = ptlrpc_conn; - imp_conn->oic_uuid = *uuid; - imp_conn->oic_last_attempt = 0; - if (priority) - list_add(&imp_conn->oic_item, &imp->imp_conn_list); - else - list_add_tail(&imp_conn->oic_item, - &imp->imp_conn_list); - CDEBUG(D_HA, "imp %p@%s: add connection %s at %s\n", - imp, imp->imp_obd->obd_name, uuid->uuid, - (priority ? "head" : "tail")); - } else { - spin_unlock(&imp->imp_lock); - rc = -ENOENT; - goto out_free; - } - - spin_unlock(&imp->imp_lock); - return 0; -out_free: - kfree(imp_conn); -out_put: - ptlrpc_connection_put(ptlrpc_conn); - return rc; -} - -int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid) -{ - return import_set_conn(imp, uuid, 1, 0); -} - -int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority) -{ - return import_set_conn(imp, uuid, priority, 1); -} -EXPORT_SYMBOL(client_import_add_conn); - -int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid) -{ - struct obd_import_conn *imp_conn; - struct obd_export *dlmexp; - int rc = -ENOENT; - - spin_lock(&imp->imp_lock); - if (list_empty(&imp->imp_conn_list)) { - LASSERT(!imp->imp_connection); - goto out; - } - - list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) { - if (!obd_uuid_equals(uuid, &imp_conn->oic_uuid)) - continue; - LASSERT(imp_conn->oic_conn); - - if (imp_conn == imp->imp_conn_current) { - LASSERT(imp_conn->oic_conn == imp->imp_connection); - - if (imp->imp_state != LUSTRE_IMP_CLOSED && - imp->imp_state != LUSTRE_IMP_DISCON) { - CERROR("can't remove current connection\n"); - rc = -EBUSY; - goto out; - } - - ptlrpc_connection_put(imp->imp_connection); - imp->imp_connection = NULL; - - dlmexp = class_conn2export(&imp->imp_dlm_handle); - if (dlmexp && dlmexp->exp_connection) { - LASSERT(dlmexp->exp_connection == - imp_conn->oic_conn); - ptlrpc_connection_put(dlmexp->exp_connection); - dlmexp->exp_connection = NULL; - } - - if (dlmexp) - class_export_put(dlmexp); - } - - list_del(&imp_conn->oic_item); - ptlrpc_connection_put(imp_conn->oic_conn); - kfree(imp_conn); - CDEBUG(D_HA, "imp %p@%s: remove connection %s\n", - imp, imp->imp_obd->obd_name, uuid->uuid); - rc = 0; - break; - } -out: - spin_unlock(&imp->imp_lock); - if (rc == -ENOENT) - CERROR("connection %s not found\n", uuid->uuid); - return rc; -} -EXPORT_SYMBOL(client_import_del_conn); - -/** - * Find conn UUID by peer NID. \a peer is a server NID. This function is used - * to find a conn uuid of \a imp which can reach \a peer. - */ -int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer, - struct obd_uuid *uuid) -{ - struct obd_import_conn *conn; - int rc = -ENOENT; - - spin_lock(&imp->imp_lock); - list_for_each_entry(conn, &imp->imp_conn_list, oic_item) { - /* Check if conn UUID does have this peer NID. */ - if (class_check_uuid(&conn->oic_uuid, peer)) { - *uuid = conn->oic_uuid; - rc = 0; - break; - } - } - spin_unlock(&imp->imp_lock); - return rc; -} -EXPORT_SYMBOL(client_import_find_conn); - -void client_destroy_import(struct obd_import *imp) -{ - /* Drop security policy instance after all RPCs have finished/aborted - * to let all busy contexts be released. - */ - class_import_get(imp); - class_destroy_import(imp); - sptlrpc_import_sec_put(imp); - class_import_put(imp); -} -EXPORT_SYMBOL(client_destroy_import); - -/* Configure an RPC client OBD device. - * - * lcfg parameters: - * 1 - client UUID - * 2 - server UUID - * 3 - inactive-on-startup - */ -int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) -{ - struct client_obd *cli = &obddev->u.cli; - struct obd_import *imp; - struct obd_uuid server_uuid; - int rq_portal, rp_portal, connect_op; - char *name = obddev->obd_type->typ_name; - enum ldlm_ns_type ns_type = LDLM_NS_TYPE_UNKNOWN; - int rc; - - /* In a more perfect world, we would hang a ptlrpc_client off of - * obd_type and just use the values from there. - */ - if (!strcmp(name, LUSTRE_OSC_NAME)) { - rq_portal = OST_REQUEST_PORTAL; - rp_portal = OSC_REPLY_PORTAL; - connect_op = OST_CONNECT; - cli->cl_sp_me = LUSTRE_SP_CLI; - cli->cl_sp_to = LUSTRE_SP_OST; - ns_type = LDLM_NS_TYPE_OSC; - } else if (!strcmp(name, LUSTRE_MDC_NAME) || - !strcmp(name, LUSTRE_LWP_NAME)) { - rq_portal = MDS_REQUEST_PORTAL; - rp_portal = MDC_REPLY_PORTAL; - connect_op = MDS_CONNECT; - cli->cl_sp_me = LUSTRE_SP_CLI; - cli->cl_sp_to = LUSTRE_SP_MDT; - ns_type = LDLM_NS_TYPE_MDC; - } else if (!strcmp(name, LUSTRE_MGC_NAME)) { - rq_portal = MGS_REQUEST_PORTAL; - rp_portal = MGC_REPLY_PORTAL; - connect_op = MGS_CONNECT; - cli->cl_sp_me = LUSTRE_SP_MGC; - cli->cl_sp_to = LUSTRE_SP_MGS; - cli->cl_flvr_mgc.sf_rpc = SPTLRPC_FLVR_INVALID; - ns_type = LDLM_NS_TYPE_MGC; - } else { - CERROR("unknown client OBD type \"%s\", can't setup\n", - name); - return -EINVAL; - } - - if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { - CERROR("requires a TARGET UUID\n"); - return -EINVAL; - } - - if (LUSTRE_CFG_BUFLEN(lcfg, 1) > 37) { - CERROR("client UUID must be less than 38 characters\n"); - return -EINVAL; - } - - if (LUSTRE_CFG_BUFLEN(lcfg, 2) < 1) { - CERROR("setup requires a SERVER UUID\n"); - return -EINVAL; - } - - if (LUSTRE_CFG_BUFLEN(lcfg, 2) > 37) { - CERROR("target UUID must be less than 38 characters\n"); - return -EINVAL; - } - - init_rwsem(&cli->cl_sem); - cli->cl_conn_count = 0; - memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2), - min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2), - sizeof(server_uuid))); - - cli->cl_dirty_pages = 0; - cli->cl_avail_grant = 0; - /* FIXME: Should limit this for the sum of all cl_dirty_max_pages. */ - /* - * cl_dirty_max_pages may be changed at connect time in - * ptlrpc_connect_interpret(). - */ - client_adjust_max_dirty(cli); - INIT_LIST_HEAD(&cli->cl_cache_waiters); - INIT_LIST_HEAD(&cli->cl_loi_ready_list); - INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list); - INIT_LIST_HEAD(&cli->cl_loi_write_list); - INIT_LIST_HEAD(&cli->cl_loi_read_list); - spin_lock_init(&cli->cl_loi_list_lock); - atomic_set(&cli->cl_pending_w_pages, 0); - atomic_set(&cli->cl_pending_r_pages, 0); - cli->cl_r_in_flight = 0; - cli->cl_w_in_flight = 0; - - spin_lock_init(&cli->cl_read_rpc_hist.oh_lock); - spin_lock_init(&cli->cl_write_rpc_hist.oh_lock); - spin_lock_init(&cli->cl_read_page_hist.oh_lock); - spin_lock_init(&cli->cl_write_page_hist.oh_lock); - spin_lock_init(&cli->cl_read_offset_hist.oh_lock); - spin_lock_init(&cli->cl_write_offset_hist.oh_lock); - - /* lru for osc. */ - INIT_LIST_HEAD(&cli->cl_lru_osc); - atomic_set(&cli->cl_lru_shrinkers, 0); - atomic_long_set(&cli->cl_lru_busy, 0); - atomic_long_set(&cli->cl_lru_in_list, 0); - INIT_LIST_HEAD(&cli->cl_lru_list); - spin_lock_init(&cli->cl_lru_list_lock); - atomic_long_set(&cli->cl_unstable_count, 0); - INIT_LIST_HEAD(&cli->cl_shrink_list); - - init_waitqueue_head(&cli->cl_destroy_waitq); - atomic_set(&cli->cl_destroy_in_flight, 0); - /* Turn on checksumming by default. */ - cli->cl_checksum = 1; - /* - * The supported checksum types will be worked out at connect time - * Set cl_chksum* to CRC32 for now to avoid returning screwed info - * through procfs. - */ - cli->cl_cksum_type = OBD_CKSUM_CRC32; - cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; - atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS); - - /* - * Set it to possible maximum size. It may be reduced by ocd_brw_size - * from OFD after connecting. - */ - cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES; - - /* - * set cl_chunkbits default value to PAGE_CACHE_SHIFT, - * it will be updated at OSC connection time. - */ - cli->cl_chunkbits = PAGE_SHIFT; - - if (!strcmp(name, LUSTRE_MDC_NAME)) - cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT; - else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) - cli->cl_max_rpcs_in_flight = 2; - else if (totalram_pages >> (20 - PAGE_SHIFT) <= 256 /* MB */) - cli->cl_max_rpcs_in_flight = 3; - else if (totalram_pages >> (20 - PAGE_SHIFT) <= 512 /* MB */) - cli->cl_max_rpcs_in_flight = 4; - else - cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT; - - spin_lock_init(&cli->cl_mod_rpcs_lock); - spin_lock_init(&cli->cl_mod_rpcs_hist.oh_lock); - cli->cl_max_mod_rpcs_in_flight = 0; - cli->cl_mod_rpcs_in_flight = 0; - cli->cl_close_rpcs_in_flight = 0; - init_waitqueue_head(&cli->cl_mod_rpcs_waitq); - cli->cl_mod_tag_bitmap = NULL; - - if (connect_op == MDS_CONNECT) { - cli->cl_max_mod_rpcs_in_flight = cli->cl_max_rpcs_in_flight - 1; - cli->cl_mod_tag_bitmap = kcalloc(BITS_TO_LONGS(OBD_MAX_RIF_MAX), - sizeof(long), GFP_NOFS); - if (!cli->cl_mod_tag_bitmap) { - rc = -ENOMEM; - goto err; - } - } - - rc = ldlm_get_ref(); - if (rc) { - CERROR("ldlm_get_ref failed: %d\n", rc); - goto err; - } - - ptlrpc_init_client(rq_portal, rp_portal, name, - &obddev->obd_ldlm_client); - - imp = class_new_import(obddev); - if (!imp) { - rc = -ENOENT; - goto err_ldlm; - } - imp->imp_client = &obddev->obd_ldlm_client; - imp->imp_connect_op = connect_op; - memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1), - LUSTRE_CFG_BUFLEN(lcfg, 1)); - class_import_put(imp); - - rc = client_import_add_conn(imp, &server_uuid, 1); - if (rc) { - CERROR("can't add initial connection\n"); - goto err_import; - } - - cli->cl_import = imp; - /* cli->cl_max_mds_easize updated by mdc_init_ea_size() */ - cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3); - - if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) { - if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) { - CDEBUG(D_HA, "marking %s %s->%s as inactive\n", - name, obddev->obd_name, - cli->cl_target_uuid.uuid); - spin_lock(&imp->imp_lock); - imp->imp_deactive = 1; - spin_unlock(&imp->imp_lock); - } - } - - obddev->obd_namespace = ldlm_namespace_new(obddev, obddev->obd_name, - LDLM_NAMESPACE_CLIENT, - LDLM_NAMESPACE_GREEDY, - ns_type); - if (!obddev->obd_namespace) { - CERROR("Unable to create client namespace - %s\n", - obddev->obd_name); - rc = -ENOMEM; - goto err_import; - } - - return rc; - -err_import: - class_destroy_import(imp); -err_ldlm: - ldlm_put_ref(); -err: - kfree(cli->cl_mod_tag_bitmap); - cli->cl_mod_tag_bitmap = NULL; - return rc; -} -EXPORT_SYMBOL(client_obd_setup); - -int client_obd_cleanup(struct obd_device *obddev) -{ - struct client_obd *cli = &obddev->u.cli; - - ldlm_namespace_free_post(obddev->obd_namespace); - obddev->obd_namespace = NULL; - - obd_cleanup_client_import(obddev); - LASSERT(!obddev->u.cli.cl_import); - - ldlm_put_ref(); - - kfree(cli->cl_mod_tag_bitmap); - cli->cl_mod_tag_bitmap = NULL; - - return 0; -} -EXPORT_SYMBOL(client_obd_cleanup); - -/* ->o_connect() method for client side (OSC and MDC and MGC) */ -int client_connect_import(const struct lu_env *env, - struct obd_export **exp, - struct obd_device *obd, struct obd_uuid *cluuid, - struct obd_connect_data *data, void *localdata) -{ - struct client_obd *cli = &obd->u.cli; - struct obd_import *imp = cli->cl_import; - struct obd_connect_data *ocd; - struct lustre_handle conn = { 0 }; - bool is_mdc = false; - int rc; - - *exp = NULL; - down_write(&cli->cl_sem); - if (cli->cl_conn_count > 0) { - rc = -EALREADY; - goto out_sem; - } - - rc = class_connect(&conn, obd, cluuid); - if (rc) - goto out_sem; - - cli->cl_conn_count++; - *exp = class_conn2export(&conn); - - LASSERT(obd->obd_namespace); - - imp->imp_dlm_handle = conn; - rc = ptlrpc_init_import(imp); - if (rc != 0) - goto out_ldlm; - - ocd = &imp->imp_connect_data; - if (data) { - *ocd = *data; - is_mdc = !strncmp(imp->imp_obd->obd_type->typ_name, - LUSTRE_MDC_NAME, 3); - if (is_mdc) - data->ocd_connect_flags |= OBD_CONNECT_MULTIMODRPCS; - imp->imp_connect_flags_orig = data->ocd_connect_flags; - } - - rc = ptlrpc_connect_import(imp); - if (rc != 0) { - if (data && is_mdc) - data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS; - LASSERT(imp->imp_state == LUSTRE_IMP_DISCON); - goto out_ldlm; - } - LASSERT(*exp && (*exp)->exp_connection); - - if (data) { - LASSERTF((ocd->ocd_connect_flags & data->ocd_connect_flags) == - ocd->ocd_connect_flags, "old %#llx, new %#llx\n", - data->ocd_connect_flags, ocd->ocd_connect_flags); - data->ocd_connect_flags = ocd->ocd_connect_flags; - /* clear the flag as it was not set and is not known - * by upper layers - */ - if (is_mdc) - data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS; - } - - ptlrpc_pinger_add_import(imp); - - if (rc) { -out_ldlm: - cli->cl_conn_count--; - class_disconnect(*exp); - *exp = NULL; - } -out_sem: - up_write(&cli->cl_sem); - - return rc; -} -EXPORT_SYMBOL(client_connect_import); - -int client_disconnect_export(struct obd_export *exp) -{ - struct obd_device *obd = class_exp2obd(exp); - struct client_obd *cli; - struct obd_import *imp; - int rc = 0, err; - - if (!obd) { - CERROR("invalid export for disconnect: exp %p cookie %#llx\n", - exp, exp ? exp->exp_handle.h_cookie : -1); - return -EINVAL; - } - - cli = &obd->u.cli; - imp = cli->cl_import; - - down_write(&cli->cl_sem); - CDEBUG(D_INFO, "disconnect %s - %zu\n", obd->obd_name, - cli->cl_conn_count); - - if (!cli->cl_conn_count) { - CERROR("disconnecting disconnected device (%s)\n", - obd->obd_name); - rc = -EINVAL; - goto out_disconnect; - } - - cli->cl_conn_count--; - if (cli->cl_conn_count) { - rc = 0; - goto out_disconnect; - } - - /* Mark import deactivated now, so we don't try to reconnect if any - * of the cleanup RPCs fails (e.g. LDLM cancel, etc). We don't - * fully deactivate the import, or that would drop all requests. - */ - spin_lock(&imp->imp_lock); - imp->imp_deactive = 1; - spin_unlock(&imp->imp_lock); - - /* Some non-replayable imports (MDS's OSCs) are pinged, so just - * delete it regardless. (It's safe to delete an import that was - * never added.) - */ - (void)ptlrpc_pinger_del_import(imp); - - if (obd->obd_namespace) { - /* obd_force == local only */ - ldlm_cli_cancel_unused(obd->obd_namespace, NULL, - obd->obd_force ? LCF_LOCAL : 0, NULL); - ldlm_namespace_free_prior(obd->obd_namespace, imp, - obd->obd_force); - } - - /* There's no need to hold sem while disconnecting an import, - * and it may actually cause deadlock in GSS. - */ - up_write(&cli->cl_sem); - rc = ptlrpc_disconnect_import(imp, 0); - down_write(&cli->cl_sem); - - ptlrpc_invalidate_import(imp); - -out_disconnect: - /* Use server style - class_disconnect should be always called for - * o_disconnect. - */ - err = class_disconnect(exp); - if (!rc && err) - rc = err; - - up_write(&cli->cl_sem); - - return rc; -} -EXPORT_SYMBOL(client_disconnect_export); - -/** - * Packs current SLV and Limit into \a req. - */ -int target_pack_pool_reply(struct ptlrpc_request *req) -{ - struct obd_device *obd; - - /* Check that we still have all structures alive as this may - * be some late RPC at shutdown time. - */ - if (unlikely(!req->rq_export || !req->rq_export->exp_obd || - !exp_connect_lru_resize(req->rq_export))) { - lustre_msg_set_slv(req->rq_repmsg, 0); - lustre_msg_set_limit(req->rq_repmsg, 0); - return 0; - } - - /* OBD is alive here as export is alive, which we checked above. */ - obd = req->rq_export->exp_obd; - - read_lock(&obd->obd_pool_lock); - lustre_msg_set_slv(req->rq_repmsg, obd->obd_pool_slv); - lustre_msg_set_limit(req->rq_repmsg, obd->obd_pool_limit); - read_unlock(&obd->obd_pool_lock); - - return 0; -} -EXPORT_SYMBOL(target_pack_pool_reply); - -static int -target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id) -{ - if (OBD_FAIL_CHECK_ORSET(fail_id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) { - DEBUG_REQ(D_ERROR, req, "dropping reply"); - return -ECOMM; - } - - if (unlikely(rc)) { - DEBUG_REQ(D_NET, req, "processing error (%d)", rc); - req->rq_status = rc; - return ptlrpc_send_error(req, 1); - } - - DEBUG_REQ(D_NET, req, "sending reply"); - return ptlrpc_send_reply(req, PTLRPC_REPLY_MAYBE_DIFFICULT); -} - -void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) -{ - struct ptlrpc_service_part *svcpt; - int netrc; - struct ptlrpc_reply_state *rs; - struct obd_export *exp; - - if (req->rq_no_reply) - return; - - svcpt = req->rq_rqbd->rqbd_svcpt; - rs = req->rq_reply_state; - if (!rs || !rs->rs_difficult) { - /* no notifiers */ - target_send_reply_msg(req, rc, fail_id); - return; - } - - /* must be an export if locks saved */ - LASSERT(req->rq_export); - /* req/reply consistent */ - LASSERT(rs->rs_svcpt == svcpt); - - /* "fresh" reply */ - LASSERT(!rs->rs_scheduled); - LASSERT(!rs->rs_scheduled_ever); - LASSERT(!rs->rs_handled); - LASSERT(!rs->rs_on_net); - LASSERT(!rs->rs_export); - LASSERT(list_empty(&rs->rs_obd_list)); - LASSERT(list_empty(&rs->rs_exp_list)); - - exp = class_export_get(req->rq_export); - - /* disable reply scheduling while I'm setting up */ - rs->rs_scheduled = 1; - rs->rs_on_net = 1; - rs->rs_xid = req->rq_xid; - rs->rs_transno = req->rq_transno; - rs->rs_export = exp; - rs->rs_opc = lustre_msg_get_opc(req->rq_reqmsg); - - spin_lock(&exp->exp_uncommitted_replies_lock); - CDEBUG(D_NET, "rs transno = %llu, last committed = %llu\n", - rs->rs_transno, exp->exp_last_committed); - if (rs->rs_transno > exp->exp_last_committed) { - /* not committed already */ - list_add_tail(&rs->rs_obd_list, - &exp->exp_uncommitted_replies); - } - spin_unlock(&exp->exp_uncommitted_replies_lock); - - spin_lock(&exp->exp_lock); - list_add_tail(&rs->rs_exp_list, &exp->exp_outstanding_replies); - spin_unlock(&exp->exp_lock); - - netrc = target_send_reply_msg(req, rc, fail_id); - - spin_lock(&svcpt->scp_rep_lock); - - atomic_inc(&svcpt->scp_nreps_difficult); - - if (netrc != 0) { - /* error sending: reply is off the net. Also we need +1 - * reply ref until ptlrpc_handle_rs() is done - * with the reply state (if the send was successful, there - * would have been +1 ref for the net, which - * reply_out_callback leaves alone) - */ - rs->rs_on_net = 0; - ptlrpc_rs_addref(rs); - } - - spin_lock(&rs->rs_lock); - if (rs->rs_transno <= exp->exp_last_committed || - (!rs->rs_on_net && !rs->rs_no_ack) || - list_empty(&rs->rs_exp_list) || /* completed already */ - list_empty(&rs->rs_obd_list)) { - CDEBUG(D_HA, "Schedule reply immediately\n"); - ptlrpc_dispatch_difficult_reply(rs); - } else { - list_add(&rs->rs_list, &svcpt->scp_rep_active); - rs->rs_scheduled = 0; /* allow notifier to schedule */ - } - spin_unlock(&rs->rs_lock); - spin_unlock(&svcpt->scp_rep_lock); -} -EXPORT_SYMBOL(target_send_reply); - -enum ldlm_mode lck_compat_array[] = { - [LCK_EX] = LCK_COMPAT_EX, - [LCK_PW] = LCK_COMPAT_PW, - [LCK_PR] = LCK_COMPAT_PR, - [LCK_CW] = LCK_COMPAT_CW, - [LCK_CR] = LCK_COMPAT_CR, - [LCK_NL] = LCK_COMPAT_NL, - [LCK_GROUP] = LCK_COMPAT_GROUP, - [LCK_COS] = LCK_COMPAT_COS, -}; - -/** - * Rather arbitrary mapping from LDLM error codes to errno values. This should - * not escape to the user level. - */ -int ldlm_error2errno(enum ldlm_error error) -{ - int result; - - switch (error) { - case ELDLM_OK: - case ELDLM_LOCK_MATCHED: - result = 0; - break; - case ELDLM_LOCK_CHANGED: - result = -ESTALE; - break; - case ELDLM_LOCK_ABORTED: - result = -ENAVAIL; - break; - case ELDLM_LOCK_REPLACED: - result = -ESRCH; - break; - case ELDLM_NO_LOCK_DATA: - result = -ENOENT; - break; - case ELDLM_NAMESPACE_EXISTS: - result = -EEXIST; - break; - case ELDLM_BAD_NAMESPACE: - result = -EBADF; - break; - default: - if (((int)error) < 0) /* cast to signed type */ - result = error; /* as enum ldlm_error can be unsigned */ - else { - CERROR("Invalid DLM result code: %d\n", error); - result = -EPROTO; - } - } - return result; -} -EXPORT_SYMBOL(ldlm_error2errno); - -#if LUSTRE_TRACKS_LOCK_EXP_REFS -void ldlm_dump_export_locks(struct obd_export *exp) -{ - spin_lock(&exp->exp_locks_list_guard); - if (!list_empty(&exp->exp_locks_list)) { - struct ldlm_lock *lock; - - CERROR("dumping locks for export %p,ignore if the unmount doesn't hang\n", - exp); - list_for_each_entry(lock, &exp->exp_locks_list, - l_exp_refs_link) - LDLM_ERROR(lock, "lock:"); - } - spin_unlock(&exp->exp_locks_list_guard); -} -#endif diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c deleted file mode 100644 index 95bea351d21d..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c +++ /dev/null @@ -1,2146 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_lock.c - * - * Author: Peter Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include <linux/libcfs/libcfs.h> -#include <lustre_intent.h> -#include <lustre_swab.h> -#include <obd_class.h> -#include "ldlm_internal.h" - -/* lock types */ -char *ldlm_lockname[] = { - [0] = "--", - [LCK_EX] = "EX", - [LCK_PW] = "PW", - [LCK_PR] = "PR", - [LCK_CW] = "CW", - [LCK_CR] = "CR", - [LCK_NL] = "NL", - [LCK_GROUP] = "GROUP", - [LCK_COS] = "COS", -}; -EXPORT_SYMBOL(ldlm_lockname); - -static char *ldlm_typename[] = { - [LDLM_PLAIN] = "PLN", - [LDLM_EXTENT] = "EXT", - [LDLM_FLOCK] = "FLK", - [LDLM_IBITS] = "IBT", -}; - -static ldlm_policy_wire_to_local_t ldlm_policy_wire_to_local[] = { - [LDLM_PLAIN - LDLM_MIN_TYPE] = ldlm_plain_policy_wire_to_local, - [LDLM_EXTENT - LDLM_MIN_TYPE] = ldlm_extent_policy_wire_to_local, - [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_wire_to_local, - [LDLM_IBITS - LDLM_MIN_TYPE] = ldlm_ibits_policy_wire_to_local, -}; - -static ldlm_policy_local_to_wire_t ldlm_policy_local_to_wire[] = { - [LDLM_PLAIN - LDLM_MIN_TYPE] = ldlm_plain_policy_local_to_wire, - [LDLM_EXTENT - LDLM_MIN_TYPE] = ldlm_extent_policy_local_to_wire, - [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_local_to_wire, - [LDLM_IBITS - LDLM_MIN_TYPE] = ldlm_ibits_policy_local_to_wire, -}; - -/** - * Converts lock policy from local format to on the wire lock_desc format - */ -static void ldlm_convert_policy_to_wire(enum ldlm_type type, - const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy) -{ - ldlm_policy_local_to_wire_t convert; - - convert = ldlm_policy_local_to_wire[type - LDLM_MIN_TYPE]; - - convert(lpolicy, wpolicy); -} - -/** - * Converts lock policy from on the wire lock_desc format to local format - */ -void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type, - const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy) -{ - ldlm_policy_wire_to_local_t convert; - - convert = ldlm_policy_wire_to_local[type - LDLM_MIN_TYPE]; - - convert(wpolicy, lpolicy); -} - -const char *ldlm_it2str(enum ldlm_intent_flags it) -{ - switch (it) { - case IT_OPEN: - return "open"; - case IT_CREAT: - return "creat"; - case (IT_OPEN | IT_CREAT): - return "open|creat"; - case IT_READDIR: - return "readdir"; - case IT_GETATTR: - return "getattr"; - case IT_LOOKUP: - return "lookup"; - case IT_UNLINK: - return "unlink"; - case IT_GETXATTR: - return "getxattr"; - case IT_LAYOUT: - return "layout"; - default: - CERROR("Unknown intent 0x%08x\n", it); - return "UNKNOWN"; - } -} -EXPORT_SYMBOL(ldlm_it2str); - -/* - * REFCOUNTED LOCK OBJECTS - */ - -/** - * Get a reference on a lock. - * - * Lock refcounts, during creation: - * - one special one for allocation, dec'd only once in destroy - * - one for being a lock that's in-use - * - one for the addref associated with a new lock - */ -struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock) -{ - atomic_inc(&lock->l_refc); - return lock; -} -EXPORT_SYMBOL(ldlm_lock_get); - -/** - * Release lock reference. - * - * Also frees the lock if it was last reference. - */ -void ldlm_lock_put(struct ldlm_lock *lock) -{ - LASSERT(lock->l_resource != LP_POISON); - LASSERT(atomic_read(&lock->l_refc) > 0); - if (atomic_dec_and_test(&lock->l_refc)) { - struct ldlm_resource *res; - - LDLM_DEBUG(lock, - "final lock_put on destroyed lock, freeing it."); - - res = lock->l_resource; - LASSERT(ldlm_is_destroyed(lock)); - LASSERT(list_empty(&lock->l_res_link)); - LASSERT(list_empty(&lock->l_pending_chain)); - - lprocfs_counter_decr(ldlm_res_to_ns(res)->ns_stats, - LDLM_NSS_LOCKS); - lu_ref_del(&res->lr_reference, "lock", lock); - ldlm_resource_putref(res); - lock->l_resource = NULL; - if (lock->l_export) { - class_export_lock_put(lock->l_export, lock); - lock->l_export = NULL; - } - - kfree(lock->l_lvb_data); - - ldlm_interval_free(ldlm_interval_detach(lock)); - lu_ref_fini(&lock->l_reference); - OBD_FREE_RCU(lock, sizeof(*lock), &lock->l_handle); - } -} -EXPORT_SYMBOL(ldlm_lock_put); - -/** - * Removes LDLM lock \a lock from LRU. Assumes LRU is already locked. - */ -int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock) -{ - int rc = 0; - - if (!list_empty(&lock->l_lru)) { - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - LASSERT(lock->l_resource->lr_type != LDLM_FLOCK); - list_del_init(&lock->l_lru); - LASSERT(ns->ns_nr_unused > 0); - ns->ns_nr_unused--; - rc = 1; - } - return rc; -} - -/** - * Removes LDLM lock \a lock from LRU. Obtains the LRU lock first. - * - * If \a last_use is non-zero, it will remove the lock from LRU only if - * it matches lock's l_last_used. - * - * \retval 0 if \a last_use is set, the lock is not in LRU list or \a last_use - * doesn't match lock's l_last_used; - * otherwise, the lock hasn't been in the LRU list. - * \retval 1 the lock was in LRU list and removed. - */ -int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - int rc = 0; - - spin_lock(&ns->ns_lock); - if (last_use == 0 || last_use == lock->l_last_used) - rc = ldlm_lock_remove_from_lru_nolock(lock); - spin_unlock(&ns->ns_lock); - - return rc; -} - -/** - * Adds LDLM lock \a lock to namespace LRU. Assumes LRU is already locked. - */ -static void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - lock->l_last_used = cfs_time_current(); - LASSERT(list_empty(&lock->l_lru)); - LASSERT(lock->l_resource->lr_type != LDLM_FLOCK); - list_add_tail(&lock->l_lru, &ns->ns_unused_list); - ldlm_clear_skipped(lock); - LASSERT(ns->ns_nr_unused >= 0); - ns->ns_nr_unused++; -} - -/** - * Adds LDLM lock \a lock to namespace LRU. Obtains necessary LRU locks - * first. - */ -static void ldlm_lock_add_to_lru(struct ldlm_lock *lock) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - spin_lock(&ns->ns_lock); - ldlm_lock_add_to_lru_nolock(lock); - spin_unlock(&ns->ns_lock); -} - -/** - * Moves LDLM lock \a lock that is already in namespace LRU to the tail of - * the LRU. Performs necessary LRU locking - */ -static void ldlm_lock_touch_in_lru(struct ldlm_lock *lock) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - spin_lock(&ns->ns_lock); - if (!list_empty(&lock->l_lru)) { - ldlm_lock_remove_from_lru_nolock(lock); - ldlm_lock_add_to_lru_nolock(lock); - } - spin_unlock(&ns->ns_lock); -} - -/** - * Helper to destroy a locked lock. - * - * Used by ldlm_lock_destroy and ldlm_lock_destroy_nolock - * Must be called with l_lock and lr_lock held. - * - * Does not actually free the lock data, but rather marks the lock as - * destroyed by setting l_destroyed field in the lock to 1. Destroys a - * handle->lock association too, so that the lock can no longer be found - * and removes the lock from LRU list. Actual lock freeing occurs when - * last lock reference goes away. - * - * Original comment (of some historical value): - * This used to have a 'strict' flag, which recovery would use to mark an - * in-use lock as needing-to-die. Lest I am ever tempted to put it back, I - * shall explain why it's gone: with the new hash table scheme, once you call - * ldlm_lock_destroy, you can never drop your final references on this lock. - * Because it's not in the hash table anymore. -phil - */ -static int ldlm_lock_destroy_internal(struct ldlm_lock *lock) -{ - if (lock->l_readers || lock->l_writers) { - LDLM_ERROR(lock, "lock still has references"); - LBUG(); - } - - if (!list_empty(&lock->l_res_link)) { - LDLM_ERROR(lock, "lock still on resource"); - LBUG(); - } - - if (ldlm_is_destroyed(lock)) { - LASSERT(list_empty(&lock->l_lru)); - return 0; - } - ldlm_set_destroyed(lock); - - if (lock->l_export && lock->l_export->exp_lock_hash) { - /* NB: it's safe to call cfs_hash_del() even lock isn't - * in exp_lock_hash. - */ - /* In the function below, .hs_keycmp resolves to - * ldlm_export_lock_keycmp() - */ - /* coverity[overrun-buffer-val] */ - cfs_hash_del(lock->l_export->exp_lock_hash, - &lock->l_remote_handle, &lock->l_exp_hash); - } - - ldlm_lock_remove_from_lru(lock); - class_handle_unhash(&lock->l_handle); - - return 1; -} - -/** - * Destroys a LDLM lock \a lock. Performs necessary locking first. - */ -static void ldlm_lock_destroy(struct ldlm_lock *lock) -{ - int first; - - lock_res_and_lock(lock); - first = ldlm_lock_destroy_internal(lock); - unlock_res_and_lock(lock); - - /* drop reference from hashtable only for first destroy */ - if (first) { - lu_ref_del(&lock->l_reference, "hash", lock); - LDLM_LOCK_RELEASE(lock); - } -} - -/** - * Destroys a LDLM lock \a lock that is already locked. - */ -void ldlm_lock_destroy_nolock(struct ldlm_lock *lock) -{ - int first; - - first = ldlm_lock_destroy_internal(lock); - /* drop reference from hashtable only for first destroy */ - if (first) { - lu_ref_del(&lock->l_reference, "hash", lock); - LDLM_LOCK_RELEASE(lock); - } -} - -/* this is called by portals_handle2object with the handle lock taken */ -static void lock_handle_addref(void *lock) -{ - LDLM_LOCK_GET((struct ldlm_lock *)lock); -} - -static void lock_handle_free(void *lock, int size) -{ - LASSERT(size == sizeof(struct ldlm_lock)); - kmem_cache_free(ldlm_lock_slab, lock); -} - -static struct portals_handle_ops lock_handle_ops = { - .hop_addref = lock_handle_addref, - .hop_free = lock_handle_free, -}; - -/** - * - * Allocate and initialize new lock structure. - * - * usage: pass in a resource on which you have done ldlm_resource_get - * new lock will take over the refcount. - * returns: lock with refcount 2 - one for current caller and one for remote - */ -static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource) -{ - struct ldlm_lock *lock; - - LASSERT(resource); - - lock = kmem_cache_zalloc(ldlm_lock_slab, GFP_NOFS); - if (!lock) - return NULL; - - spin_lock_init(&lock->l_lock); - lock->l_resource = resource; - lu_ref_add(&resource->lr_reference, "lock", lock); - - atomic_set(&lock->l_refc, 2); - INIT_LIST_HEAD(&lock->l_res_link); - INIT_LIST_HEAD(&lock->l_lru); - INIT_LIST_HEAD(&lock->l_pending_chain); - INIT_LIST_HEAD(&lock->l_bl_ast); - INIT_LIST_HEAD(&lock->l_cp_ast); - INIT_LIST_HEAD(&lock->l_rk_ast); - init_waitqueue_head(&lock->l_waitq); - lock->l_blocking_lock = NULL; - INIT_LIST_HEAD(&lock->l_sl_mode); - INIT_LIST_HEAD(&lock->l_sl_policy); - INIT_HLIST_NODE(&lock->l_exp_hash); - INIT_HLIST_NODE(&lock->l_exp_flock_hash); - - lprocfs_counter_incr(ldlm_res_to_ns(resource)->ns_stats, - LDLM_NSS_LOCKS); - INIT_LIST_HEAD(&lock->l_handle.h_link); - class_handle_hash(&lock->l_handle, &lock_handle_ops); - - lu_ref_init(&lock->l_reference); - lu_ref_add(&lock->l_reference, "hash", lock); - lock->l_callback_timeout = 0; - -#if LUSTRE_TRACKS_LOCK_EXP_REFS - INIT_LIST_HEAD(&lock->l_exp_refs_link); - lock->l_exp_refs_nr = 0; - lock->l_exp_refs_target = NULL; -#endif - - return lock; -} - -/** - * Moves LDLM lock \a lock to another resource. - * This is used on client when server returns some other lock than requested - * (typically as a result of intent operation) - */ -int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock, - const struct ldlm_res_id *new_resid) -{ - struct ldlm_resource *oldres = lock->l_resource; - struct ldlm_resource *newres; - int type; - - lock_res_and_lock(lock); - if (memcmp(new_resid, &lock->l_resource->lr_name, - sizeof(lock->l_resource->lr_name)) == 0) { - /* Nothing to do */ - unlock_res_and_lock(lock); - return 0; - } - - LASSERT(new_resid->name[0] != 0); - - /* This function assumes that the lock isn't on any lists */ - LASSERT(list_empty(&lock->l_res_link)); - - type = oldres->lr_type; - unlock_res_and_lock(lock); - - newres = ldlm_resource_get(ns, NULL, new_resid, type, 1); - if (IS_ERR(newres)) - return PTR_ERR(newres); - - lu_ref_add(&newres->lr_reference, "lock", lock); - /* - * To flip the lock from the old to the new resource, lock, oldres and - * newres have to be locked. Resource spin-locks are nested within - * lock->l_lock, and are taken in the memory address order to avoid - * dead-locks. - */ - spin_lock(&lock->l_lock); - oldres = lock->l_resource; - if (oldres < newres) { - lock_res(oldres); - lock_res_nested(newres, LRT_NEW); - } else { - lock_res(newres); - lock_res_nested(oldres, LRT_NEW); - } - LASSERT(memcmp(new_resid, &oldres->lr_name, - sizeof(oldres->lr_name)) != 0); - lock->l_resource = newres; - unlock_res(oldres); - unlock_res_and_lock(lock); - - /* ...and the flowers are still standing! */ - lu_ref_del(&oldres->lr_reference, "lock", lock); - ldlm_resource_putref(oldres); - - return 0; -} - -/** \defgroup ldlm_handles LDLM HANDLES - * Ways to get hold of locks without any addresses. - * @{ - */ - -/** - * Fills in handle for LDLM lock \a lock into supplied \a lockh - * Does not take any references. - */ -void ldlm_lock2handle(const struct ldlm_lock *lock, struct lustre_handle *lockh) -{ - lockh->cookie = lock->l_handle.h_cookie; -} -EXPORT_SYMBOL(ldlm_lock2handle); - -/** - * Obtain a lock reference by handle. - * - * if \a flags: atomically get the lock and set the flags. - * Return NULL if flag already set - */ -struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle, - __u64 flags) -{ - struct ldlm_lock *lock; - - LASSERT(handle); - - lock = class_handle2object(handle->cookie, NULL); - if (!lock) - return NULL; - - if (lock->l_export && lock->l_export->exp_failed) { - CDEBUG(D_INFO, "lock export failed: lock %p, exp %p\n", - lock, lock->l_export); - LDLM_LOCK_PUT(lock); - return NULL; - } - - /* It's unlikely but possible that someone marked the lock as - * destroyed after we did handle2object on it - */ - if (flags == 0 && !ldlm_is_destroyed(lock)) { - lu_ref_add(&lock->l_reference, "handle", current); - return lock; - } - - lock_res_and_lock(lock); - - LASSERT(lock->l_resource); - - lu_ref_add_atomic(&lock->l_reference, "handle", current); - if (unlikely(ldlm_is_destroyed(lock))) { - unlock_res_and_lock(lock); - CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock); - LDLM_LOCK_PUT(lock); - return NULL; - } - - if (flags) { - if (lock->l_flags & flags) { - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - return NULL; - } - - lock->l_flags |= flags; - } - - unlock_res_and_lock(lock); - return lock; -} -EXPORT_SYMBOL(__ldlm_handle2lock); -/** @} ldlm_handles */ - -/** - * Fill in "on the wire" representation for given LDLM lock into supplied - * lock descriptor \a desc structure. - */ -void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc) -{ - ldlm_res2desc(lock->l_resource, &desc->l_resource); - desc->l_req_mode = lock->l_req_mode; - desc->l_granted_mode = lock->l_granted_mode; - ldlm_convert_policy_to_wire(lock->l_resource->lr_type, - &lock->l_policy_data, - &desc->l_policy_data); -} - -/** - * Add a lock to list of conflicting locks to send AST to. - * - * Only add if we have not sent a blocking AST to the lock yet. - */ -static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new, - struct list_head *work_list) -{ - if (!ldlm_is_ast_sent(lock)) { - LDLM_DEBUG(lock, "lock incompatible; sending blocking AST."); - ldlm_set_ast_sent(lock); - /* If the enqueuing client said so, tell the AST recipient to - * discard dirty data, rather than writing back. - */ - if (ldlm_is_ast_discard_data(new)) - ldlm_set_discard_data(lock); - LASSERT(list_empty(&lock->l_bl_ast)); - list_add(&lock->l_bl_ast, work_list); - LDLM_LOCK_GET(lock); - LASSERT(!lock->l_blocking_lock); - lock->l_blocking_lock = LDLM_LOCK_GET(new); - } -} - -/** - * Add a lock to list of just granted locks to send completion AST to. - */ -static void ldlm_add_cp_work_item(struct ldlm_lock *lock, - struct list_head *work_list) -{ - if (!ldlm_is_cp_reqd(lock)) { - ldlm_set_cp_reqd(lock); - LDLM_DEBUG(lock, "lock granted; sending completion AST."); - LASSERT(list_empty(&lock->l_cp_ast)); - list_add(&lock->l_cp_ast, work_list); - LDLM_LOCK_GET(lock); - } -} - -/** - * Aggregator function to add AST work items into a list. Determines - * what sort of an AST work needs to be done and calls the proper - * adding function. - * Must be called with lr_lock held. - */ -static void ldlm_add_ast_work_item(struct ldlm_lock *lock, - struct ldlm_lock *new, - struct list_head *work_list) -{ - check_res_locked(lock->l_resource); - if (new) - ldlm_add_bl_work_item(lock, new, work_list); - else - ldlm_add_cp_work_item(lock, work_list); -} - -/** - * Add specified reader/writer reference to LDLM lock with handle \a lockh. - * r/w reference type is determined by \a mode - * Calls ldlm_lock_addref_internal. - */ -void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode) -{ - struct ldlm_lock *lock; - - lock = ldlm_handle2lock(lockh); - LASSERTF(lock, "Non-existing lock: %llx\n", lockh->cookie); - ldlm_lock_addref_internal(lock, mode); - LDLM_LOCK_PUT(lock); -} -EXPORT_SYMBOL(ldlm_lock_addref); - -/** - * Helper function. - * Add specified reader/writer reference to LDLM lock \a lock. - * r/w reference type is determined by \a mode - * Removes lock from LRU if it is there. - * Assumes the LDLM lock is already locked. - */ -void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, - enum ldlm_mode mode) -{ - ldlm_lock_remove_from_lru(lock); - if (mode & (LCK_NL | LCK_CR | LCK_PR)) { - lock->l_readers++; - lu_ref_add_atomic(&lock->l_reference, "reader", lock); - } - if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) { - lock->l_writers++; - lu_ref_add_atomic(&lock->l_reference, "writer", lock); - } - LDLM_LOCK_GET(lock); - lu_ref_add_atomic(&lock->l_reference, "user", lock); - LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]); -} - -/** - * Attempts to add reader/writer reference to a lock with handle \a lockh, and - * fails if lock is already LDLM_FL_CBPENDING or destroyed. - * - * \retval 0 success, lock was addref-ed - * - * \retval -EAGAIN lock is being canceled. - */ -int ldlm_lock_addref_try(const struct lustre_handle *lockh, enum ldlm_mode mode) -{ - struct ldlm_lock *lock; - int result; - - result = -EAGAIN; - lock = ldlm_handle2lock(lockh); - if (lock) { - lock_res_and_lock(lock); - if (lock->l_readers != 0 || lock->l_writers != 0 || - !ldlm_is_cbpending(lock)) { - ldlm_lock_addref_internal_nolock(lock, mode); - result = 0; - } - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - } - return result; -} -EXPORT_SYMBOL(ldlm_lock_addref_try); - -/** - * Add specified reader/writer reference to LDLM lock \a lock. - * Locks LDLM lock and calls ldlm_lock_addref_internal_nolock to do the work. - * Only called for local locks. - */ -void ldlm_lock_addref_internal(struct ldlm_lock *lock, enum ldlm_mode mode) -{ - lock_res_and_lock(lock); - ldlm_lock_addref_internal_nolock(lock, mode); - unlock_res_and_lock(lock); -} - -/** - * Removes reader/writer reference for LDLM lock \a lock. - * Assumes LDLM lock is already locked. - * only called in ldlm_flock_destroy and for local locks. - * Does NOT add lock to LRU if no r/w references left to accommodate flock locks - * that cannot be placed in LRU. - */ -void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, - enum ldlm_mode mode) -{ - LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); - if (mode & (LCK_NL | LCK_CR | LCK_PR)) { - LASSERT(lock->l_readers > 0); - lu_ref_del(&lock->l_reference, "reader", lock); - lock->l_readers--; - } - if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) { - LASSERT(lock->l_writers > 0); - lu_ref_del(&lock->l_reference, "writer", lock); - lock->l_writers--; - } - - lu_ref_del(&lock->l_reference, "user", lock); - LDLM_LOCK_RELEASE(lock); /* matches the LDLM_LOCK_GET() in addref */ -} - -/** - * Removes reader/writer reference for LDLM lock \a lock. - * Locks LDLM lock first. - * If the lock is determined to be client lock on a client and r/w refcount - * drops to zero and the lock is not blocked, the lock is added to LRU lock - * on the namespace. - * For blocked LDLM locks if r/w count drops to zero, blocking_ast is called. - */ -void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode) -{ - struct ldlm_namespace *ns; - - lock_res_and_lock(lock); - - ns = ldlm_lock_to_ns(lock); - - ldlm_lock_decref_internal_nolock(lock, mode); - - if ((ldlm_is_local(lock) || lock->l_req_mode == LCK_GROUP) && - !lock->l_readers && !lock->l_writers) { - /* If this is a local lock on a server namespace and this was - * the last reference, cancel the lock. - * - * Group locks are special: - * They must not go in LRU, but they are not called back - * like non-group locks, instead they are manually released. - * They have an l_writers reference which they keep until - * they are manually released, so we remove them when they have - * no more reader or writer references. - LU-6368 - */ - ldlm_set_cbpending(lock); - } - - if (!lock->l_readers && !lock->l_writers && ldlm_is_cbpending(lock)) { - /* If we received a blocked AST and this was the last reference, - * run the callback. - */ - LDLM_DEBUG(lock, "final decref done on cbpending lock"); - - LDLM_LOCK_GET(lock); /* dropped by bl thread */ - ldlm_lock_remove_from_lru(lock); - unlock_res_and_lock(lock); - - if (ldlm_is_fail_loc(lock)) - OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - - if (ldlm_is_atomic_cb(lock) || - ldlm_bl_to_thread_lock(ns, NULL, lock) != 0) - ldlm_handle_bl_callback(ns, NULL, lock); - } else if (!lock->l_readers && !lock->l_writers && - !ldlm_is_no_lru(lock) && !ldlm_is_bl_ast(lock)) { - LDLM_DEBUG(lock, "add lock into lru list"); - - /* If this is a client-side namespace and this was the last - * reference, put it on the LRU. - */ - ldlm_lock_add_to_lru(lock); - unlock_res_and_lock(lock); - - if (ldlm_is_fail_loc(lock)) - OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - - /* Call ldlm_cancel_lru() only if EARLY_CANCEL and LRU RESIZE - * are not supported by the server, otherwise, it is done on - * enqueue. - */ - if (!exp_connect_cancelset(lock->l_conn_export) && - !ns_connect_lru_resize(ns)) - ldlm_cancel_lru(ns, 0, LCF_ASYNC, 0); - } else { - LDLM_DEBUG(lock, "do not add lock into lru list"); - unlock_res_and_lock(lock); - } -} - -/** - * Decrease reader/writer refcount for LDLM lock with handle \a lockh - */ -void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode) -{ - struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0); - - LASSERTF(lock, "Non-existing lock: %#llx\n", lockh->cookie); - ldlm_lock_decref_internal(lock, mode); - LDLM_LOCK_PUT(lock); -} -EXPORT_SYMBOL(ldlm_lock_decref); - -/** - * Decrease reader/writer refcount for LDLM lock with handle - * \a lockh and mark it for subsequent cancellation once r/w refcount - * drops to zero instead of putting into LRU. - */ -void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, - enum ldlm_mode mode) -{ - struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0); - - LASSERT(lock); - - LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); - lock_res_and_lock(lock); - ldlm_set_cbpending(lock); - unlock_res_and_lock(lock); - ldlm_lock_decref_internal(lock, mode); - LDLM_LOCK_PUT(lock); -} -EXPORT_SYMBOL(ldlm_lock_decref_and_cancel); - -struct sl_insert_point { - struct list_head *res_link; - struct list_head *mode_link; - struct list_head *policy_link; -}; - -/** - * Finds a position to insert the new lock into granted lock list. - * - * Used for locks eligible for skiplist optimization. - * - * Parameters: - * queue [input]: the granted list where search acts on; - * req [input]: the lock whose position to be located; - * prev [output]: positions within 3 lists to insert @req to - * Return Value: - * filled @prev - * NOTE: called by - * - ldlm_grant_lock_with_skiplist - */ -static void search_granted_lock(struct list_head *queue, - struct ldlm_lock *req, - struct sl_insert_point *prev) -{ - struct ldlm_lock *lock, *mode_end, *policy_end; - - list_for_each_entry(lock, queue, l_res_link) { - - mode_end = list_prev_entry(lock, l_sl_mode); - - if (lock->l_req_mode != req->l_req_mode) { - /* jump to last lock of mode group */ - lock = mode_end; - continue; - } - - /* suitable mode group is found */ - if (lock->l_resource->lr_type == LDLM_PLAIN) { - /* insert point is last lock of the mode group */ - prev->res_link = &mode_end->l_res_link; - prev->mode_link = &mode_end->l_sl_mode; - prev->policy_link = &req->l_sl_policy; - return; - } - - if (lock->l_resource->lr_type == LDLM_IBITS) { - for (;;) { - policy_end = - list_prev_entry(lock, l_sl_policy); - - if (lock->l_policy_data.l_inodebits.bits == - req->l_policy_data.l_inodebits.bits) { - /* insert point is last lock of - * the policy group - */ - prev->res_link = - &policy_end->l_res_link; - prev->mode_link = - &policy_end->l_sl_mode; - prev->policy_link = - &policy_end->l_sl_policy; - return; - } - - if (policy_end == mode_end) - /* done with mode group */ - break; - - /* go to next policy group within mode group */ - lock = list_next_entry(policy_end, l_res_link); - } /* loop over policy groups within the mode group */ - - /* insert point is last lock of the mode group, - * new policy group is started - */ - prev->res_link = &mode_end->l_res_link; - prev->mode_link = &mode_end->l_sl_mode; - prev->policy_link = &req->l_sl_policy; - return; - } - - LDLM_ERROR(lock, "is not LDLM_PLAIN or LDLM_IBITS lock"); - LBUG(); - } - - /* insert point is last lock on the queue, - * new mode group and new policy group are started - */ - prev->res_link = queue->prev; - prev->mode_link = &req->l_sl_mode; - prev->policy_link = &req->l_sl_policy; -} - -/** - * Add a lock into resource granted list after a position described by - * \a prev. - */ -static void ldlm_granted_list_add_lock(struct ldlm_lock *lock, - struct sl_insert_point *prev) -{ - struct ldlm_resource *res = lock->l_resource; - - check_res_locked(res); - - ldlm_resource_dump(D_INFO, res); - LDLM_DEBUG(lock, "About to add lock:"); - - if (ldlm_is_destroyed(lock)) { - CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); - return; - } - - LASSERT(list_empty(&lock->l_res_link)); - LASSERT(list_empty(&lock->l_sl_mode)); - LASSERT(list_empty(&lock->l_sl_policy)); - - /* - * lock->link == prev->link means lock is first starting the group. - * Don't re-add to itself to suppress kernel warnings. - */ - if (&lock->l_res_link != prev->res_link) - list_add(&lock->l_res_link, prev->res_link); - if (&lock->l_sl_mode != prev->mode_link) - list_add(&lock->l_sl_mode, prev->mode_link); - if (&lock->l_sl_policy != prev->policy_link) - list_add(&lock->l_sl_policy, prev->policy_link); -} - -/** - * Add a lock to granted list on a resource maintaining skiplist - * correctness. - */ -static void ldlm_grant_lock_with_skiplist(struct ldlm_lock *lock) -{ - struct sl_insert_point prev; - - LASSERT(lock->l_req_mode == lock->l_granted_mode); - - search_granted_lock(&lock->l_resource->lr_granted, lock, &prev); - ldlm_granted_list_add_lock(lock, &prev); -} - -/** - * Perform lock granting bookkeeping. - * - * Includes putting the lock into granted list and updating lock mode. - * NOTE: called by - * - ldlm_lock_enqueue - * - ldlm_reprocess_queue - * - ldlm_lock_convert - * - * must be called with lr_lock held - */ -void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list) -{ - struct ldlm_resource *res = lock->l_resource; - - check_res_locked(res); - - lock->l_granted_mode = lock->l_req_mode; - - if (work_list && lock->l_completion_ast) - ldlm_add_ast_work_item(lock, NULL, work_list); - - if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS) { - ldlm_grant_lock_with_skiplist(lock); - } else if (res->lr_type == LDLM_EXTENT) { - ldlm_extent_add_lock(res, lock); - } else if (res->lr_type == LDLM_FLOCK) { - /* - * We should not add locks to granted list in - * the following cases: - * - this is an UNLOCK but not a real lock; - * - this is a TEST lock; - * - this is a F_CANCELLK lock (async flock has req_mode == 0) - * - this is a deadlock (flock cannot be granted) - */ - if (!lock->l_req_mode || lock->l_req_mode == LCK_NL || - ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock)) - return; - ldlm_resource_add_lock(res, &res->lr_granted, lock); - } else { - LBUG(); - } - - ldlm_pool_add(&ldlm_res_to_ns(res)->ns_pool, lock); -} - -/** - * Describe the overlap between two locks. itree_overlap_cb data. - */ -struct lock_match_data { - struct ldlm_lock *lmd_old; - struct ldlm_lock *lmd_lock; - enum ldlm_mode *lmd_mode; - union ldlm_policy_data *lmd_policy; - __u64 lmd_flags; - int lmd_unref; -}; - -/** - * Check if the given @lock meets the criteria for a match. - * A reference on the lock is taken if matched. - * - * \param lock test-against this lock - * \param data parameters - */ -static int lock_matches(struct ldlm_lock *lock, struct lock_match_data *data) -{ - union ldlm_policy_data *lpol = &lock->l_policy_data; - enum ldlm_mode match; - - if (lock == data->lmd_old) - return INTERVAL_ITER_STOP; - - /* - * Check if this lock can be matched. - * Used by LU-2919(exclusive open) for open lease lock - */ - if (ldlm_is_excl(lock)) - return INTERVAL_ITER_CONT; - - /* - * llite sometimes wants to match locks that will be - * canceled when their users drop, but we allow it to match - * if it passes in CBPENDING and the lock still has users. - * this is generally only going to be used by children - * whose parents already hold a lock so forward progress - * can still happen. - */ - if (ldlm_is_cbpending(lock) && - !(data->lmd_flags & LDLM_FL_CBPENDING)) - return INTERVAL_ITER_CONT; - - if (!data->lmd_unref && ldlm_is_cbpending(lock) && - !lock->l_readers && !lock->l_writers) - return INTERVAL_ITER_CONT; - - if (!(lock->l_req_mode & *data->lmd_mode)) - return INTERVAL_ITER_CONT; - match = lock->l_req_mode; - - switch (lock->l_resource->lr_type) { - case LDLM_EXTENT: - if (lpol->l_extent.start > data->lmd_policy->l_extent.start || - lpol->l_extent.end < data->lmd_policy->l_extent.end) - return INTERVAL_ITER_CONT; - - if (unlikely(match == LCK_GROUP) && - data->lmd_policy->l_extent.gid != LDLM_GID_ANY && - lpol->l_extent.gid != data->lmd_policy->l_extent.gid) - return INTERVAL_ITER_CONT; - break; - case LDLM_IBITS: - /* - * We match if we have existing lock with same or wider set - * of bits. - */ - if ((lpol->l_inodebits.bits & - data->lmd_policy->l_inodebits.bits) != - data->lmd_policy->l_inodebits.bits) - return INTERVAL_ITER_CONT; - break; - default: - break; - } - /* - * We match if we have existing lock with same or wider set - * of bits. - */ - if (!data->lmd_unref && LDLM_HAVE_MASK(lock, GONE)) - return INTERVAL_ITER_CONT; - - if (!equi(data->lmd_flags & LDLM_FL_LOCAL_ONLY, ldlm_is_local(lock))) - return INTERVAL_ITER_CONT; - - if (data->lmd_flags & LDLM_FL_TEST_LOCK) { - LDLM_LOCK_GET(lock); - ldlm_lock_touch_in_lru(lock); - } else { - ldlm_lock_addref_internal_nolock(lock, match); - } - - *data->lmd_mode = match; - data->lmd_lock = lock; - - return INTERVAL_ITER_STOP; -} - -static enum interval_iter itree_overlap_cb(struct interval_node *in, void *args) -{ - struct ldlm_interval *node = to_ldlm_interval(in); - struct lock_match_data *data = args; - struct ldlm_lock *lock; - int rc; - - list_for_each_entry(lock, &node->li_group, l_sl_policy) { - rc = lock_matches(lock, data); - if (rc == INTERVAL_ITER_STOP) - return INTERVAL_ITER_STOP; - } - return INTERVAL_ITER_CONT; -} - -/** - * Search for a lock with given parameters in interval trees. - * - * \param res search for a lock in this resource - * \param data parameters - * - * \retval a referenced lock or NULL. - */ -static struct ldlm_lock *search_itree(struct ldlm_resource *res, - struct lock_match_data *data) -{ - struct interval_node_extent ext = { - .start = data->lmd_policy->l_extent.start, - .end = data->lmd_policy->l_extent.end - }; - int idx; - - for (idx = 0; idx < LCK_MODE_NUM; idx++) { - struct ldlm_interval_tree *tree = &res->lr_itree[idx]; - - if (!tree->lit_root) - continue; - - if (!(tree->lit_mode & *data->lmd_mode)) - continue; - - interval_search(tree->lit_root, &ext, - itree_overlap_cb, data); - } - return data->lmd_lock; -} - -/** - * Search for a lock with given properties in a queue. - * - * \param queue search for a lock in this queue - * \param data parameters - * - * \retval a referenced lock or NULL. - */ -static struct ldlm_lock *search_queue(struct list_head *queue, - struct lock_match_data *data) -{ - struct ldlm_lock *lock; - int rc; - - list_for_each_entry(lock, queue, l_res_link) { - rc = lock_matches(lock, data); - if (rc == INTERVAL_ITER_STOP) - return data->lmd_lock; - } - return NULL; -} - -void ldlm_lock_fail_match_locked(struct ldlm_lock *lock) -{ - if ((lock->l_flags & LDLM_FL_FAIL_NOTIFIED) == 0) { - lock->l_flags |= LDLM_FL_FAIL_NOTIFIED; - wake_up_all(&lock->l_waitq); - } -} - -/** - * Mark lock as "matchable" by OST. - * - * Used to prevent certain races in LOV/OSC where the lock is granted, but LVB - * is not yet valid. - * Assumes LDLM lock is already locked. - */ -void ldlm_lock_allow_match_locked(struct ldlm_lock *lock) -{ - ldlm_set_lvb_ready(lock); - wake_up_all(&lock->l_waitq); -} -EXPORT_SYMBOL(ldlm_lock_allow_match_locked); - -/** - * Mark lock as "matchable" by OST. - * Locks the lock and then \see ldlm_lock_allow_match_locked - */ -void ldlm_lock_allow_match(struct ldlm_lock *lock) -{ - lock_res_and_lock(lock); - ldlm_lock_allow_match_locked(lock); - unlock_res_and_lock(lock); -} -EXPORT_SYMBOL(ldlm_lock_allow_match); - -/** - * Attempt to find a lock with specified properties. - * - * Typically returns a reference to matched lock unless LDLM_FL_TEST_LOCK is - * set in \a flags - * - * Can be called in two ways: - * - * If 'ns' is NULL, then lockh describes an existing lock that we want to look - * for a duplicate of. - * - * Otherwise, all of the fields must be filled in, to match against. - * - * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the - * server (ie, connh is NULL) - * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted - * list will be considered - * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked - * to be canceled can still be matched as long as they still have reader - * or writer referneces - * If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock, - * just tell us if we would have matched. - * - * \retval 1 if it finds an already-existing lock that is compatible; in this - * case, lockh is filled in with a addref()ed lock - * - * We also check security context, and if that fails we simply return 0 (to - * keep caller code unchanged), the context failure will be discovered by - * caller sometime later. - */ -enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, - const struct ldlm_res_id *res_id, - enum ldlm_type type, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - struct lustre_handle *lockh, int unref) -{ - struct lock_match_data data = { - .lmd_old = NULL, - .lmd_lock = NULL, - .lmd_mode = &mode, - .lmd_policy = policy, - .lmd_flags = flags, - .lmd_unref = unref, - }; - struct ldlm_resource *res; - struct ldlm_lock *lock; - int rc = 0; - - if (!ns) { - data.lmd_old = ldlm_handle2lock(lockh); - LASSERT(data.lmd_old); - - ns = ldlm_lock_to_ns(data.lmd_old); - res_id = &data.lmd_old->l_resource->lr_name; - type = data.lmd_old->l_resource->lr_type; - *data.lmd_mode = data.lmd_old->l_req_mode; - } - - res = ldlm_resource_get(ns, NULL, res_id, type, 0); - if (IS_ERR(res)) { - LASSERT(!data.lmd_old); - return 0; - } - - LDLM_RESOURCE_ADDREF(res); - lock_res(res); - - if (res->lr_type == LDLM_EXTENT) - lock = search_itree(res, &data); - else - lock = search_queue(&res->lr_granted, &data); - if (lock) { - rc = 1; - goto out; - } - if (flags & LDLM_FL_BLOCK_GRANTED) { - rc = 0; - goto out; - } - lock = search_queue(&res->lr_waiting, &data); - if (lock) { - rc = 1; - goto out; - } -out: - unlock_res(res); - LDLM_RESOURCE_DELREF(res); - ldlm_resource_putref(res); - - if (lock) { - ldlm_lock2handle(lock, lockh); - if ((flags & LDLM_FL_LVB_READY) && !ldlm_is_lvb_ready(lock)) { - __u64 wait_flags = LDLM_FL_LVB_READY | - LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED; - - if (lock->l_completion_ast) { - int err = lock->l_completion_ast(lock, - LDLM_FL_WAIT_NOREPROC, - NULL); - if (err) { - if (flags & LDLM_FL_TEST_LOCK) - LDLM_LOCK_RELEASE(lock); - else - ldlm_lock_decref_internal(lock, - mode); - rc = 0; - goto out2; - } - } - - /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */ - wait_event_idle_timeout(lock->l_waitq, - lock->l_flags & wait_flags, - obd_timeout * HZ); - if (!ldlm_is_lvb_ready(lock)) { - if (flags & LDLM_FL_TEST_LOCK) - LDLM_LOCK_RELEASE(lock); - else - ldlm_lock_decref_internal(lock, mode); - rc = 0; - } - } - } - out2: - if (rc) { - LDLM_DEBUG(lock, "matched (%llu %llu)", - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[2] : policy->l_extent.start, - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[3] : policy->l_extent.end); - - /* check user's security context */ - if (lock->l_conn_export && - sptlrpc_import_check_ctx( - class_exp2cliimp(lock->l_conn_export))) { - if (!(flags & LDLM_FL_TEST_LOCK)) - ldlm_lock_decref_internal(lock, mode); - rc = 0; - } - - if (flags & LDLM_FL_TEST_LOCK) - LDLM_LOCK_RELEASE(lock); - - } else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/ - LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res %llu/%llu (%llu %llu)", - ns, type, mode, res_id->name[0], - res_id->name[1], - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[2] : policy->l_extent.start, - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[3] : policy->l_extent.end); - } - if (data.lmd_old) - LDLM_LOCK_PUT(data.lmd_old); - - return rc ? mode : 0; -} -EXPORT_SYMBOL(ldlm_lock_match); - -enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, - __u64 *bits) -{ - struct ldlm_lock *lock; - enum ldlm_mode mode = 0; - - lock = ldlm_handle2lock(lockh); - if (lock) { - lock_res_and_lock(lock); - if (LDLM_HAVE_MASK(lock, GONE)) - goto out; - - if (ldlm_is_cbpending(lock) && - lock->l_readers == 0 && lock->l_writers == 0) - goto out; - - if (bits) - *bits = lock->l_policy_data.l_inodebits.bits; - mode = lock->l_granted_mode; - ldlm_lock_addref_internal_nolock(lock, mode); - } - -out: - if (lock) { - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - } - return mode; -} -EXPORT_SYMBOL(ldlm_revalidate_lock_handle); - -/** The caller must guarantee that the buffer is large enough. */ -int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill, - enum req_location loc, void *data, int size) -{ - void *lvb; - - LASSERT(data); - LASSERT(size >= 0); - - switch (lock->l_lvb_type) { - case LVB_T_OST: - if (size == sizeof(struct ost_lvb)) { - if (loc == RCL_CLIENT) - lvb = req_capsule_client_swab_get(pill, - &RMF_DLM_LVB, - lustre_swab_ost_lvb); - else - lvb = req_capsule_server_swab_get(pill, - &RMF_DLM_LVB, - lustre_swab_ost_lvb); - if (unlikely(!lvb)) { - LDLM_ERROR(lock, "no LVB"); - return -EPROTO; - } - - memcpy(data, lvb, size); - } else if (size == sizeof(struct ost_lvb_v1)) { - struct ost_lvb *olvb = data; - - if (loc == RCL_CLIENT) - lvb = req_capsule_client_swab_get(pill, - &RMF_DLM_LVB, - lustre_swab_ost_lvb_v1); - else - lvb = req_capsule_server_sized_swab_get(pill, - &RMF_DLM_LVB, size, - lustre_swab_ost_lvb_v1); - if (unlikely(!lvb)) { - LDLM_ERROR(lock, "no LVB"); - return -EPROTO; - } - - memcpy(data, lvb, size); - olvb->lvb_mtime_ns = 0; - olvb->lvb_atime_ns = 0; - olvb->lvb_ctime_ns = 0; - } else { - LDLM_ERROR(lock, "Replied unexpected ost LVB size %d", - size); - return -EINVAL; - } - break; - case LVB_T_LQUOTA: - if (size == sizeof(struct lquota_lvb)) { - if (loc == RCL_CLIENT) - lvb = req_capsule_client_swab_get(pill, - &RMF_DLM_LVB, - lustre_swab_lquota_lvb); - else - lvb = req_capsule_server_swab_get(pill, - &RMF_DLM_LVB, - lustre_swab_lquota_lvb); - if (unlikely(!lvb)) { - LDLM_ERROR(lock, "no LVB"); - return -EPROTO; - } - - memcpy(data, lvb, size); - } else { - LDLM_ERROR(lock, - "Replied unexpected lquota LVB size %d", - size); - return -EINVAL; - } - break; - case LVB_T_LAYOUT: - if (size == 0) - break; - - if (loc == RCL_CLIENT) - lvb = req_capsule_client_get(pill, &RMF_DLM_LVB); - else - lvb = req_capsule_server_get(pill, &RMF_DLM_LVB); - if (unlikely(!lvb)) { - LDLM_ERROR(lock, "no LVB"); - return -EPROTO; - } - - memcpy(data, lvb, size); - break; - default: - LDLM_ERROR(lock, "Unknown LVB type: %d", lock->l_lvb_type); - dump_stack(); - return -EINVAL; - } - - return 0; -} - -/** - * Create and fill in new LDLM lock with specified properties. - * Returns a referenced lock - */ -struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - enum ldlm_type type, - enum ldlm_mode mode, - const struct ldlm_callback_suite *cbs, - void *data, __u32 lvb_len, - enum lvb_type lvb_type) -{ - struct ldlm_lock *lock; - struct ldlm_resource *res; - int rc; - - res = ldlm_resource_get(ns, NULL, res_id, type, 1); - if (IS_ERR(res)) - return ERR_CAST(res); - - lock = ldlm_lock_new(res); - if (!lock) - return ERR_PTR(-ENOMEM); - - lock->l_req_mode = mode; - lock->l_ast_data = data; - lock->l_pid = current_pid(); - if (cbs) { - lock->l_blocking_ast = cbs->lcs_blocking; - lock->l_completion_ast = cbs->lcs_completion; - lock->l_glimpse_ast = cbs->lcs_glimpse; - } - - lock->l_tree_node = NULL; - /* if this is the extent lock, allocate the interval tree node */ - if (type == LDLM_EXTENT) { - if (!ldlm_interval_alloc(lock)) { - rc = -ENOMEM; - goto out; - } - } - - if (lvb_len) { - lock->l_lvb_len = lvb_len; - lock->l_lvb_data = kzalloc(lvb_len, GFP_NOFS); - if (!lock->l_lvb_data) { - rc = -ENOMEM; - goto out; - } - } - - lock->l_lvb_type = lvb_type; - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_NEW_LOCK)) { - rc = -ENOENT; - goto out; - } - - return lock; - -out: - ldlm_lock_destroy(lock); - LDLM_LOCK_RELEASE(lock); - return ERR_PTR(rc); -} - -/** - * Enqueue (request) a lock. - * On the client this is called from ldlm_cli_enqueue_fini - * after we already got an initial reply from the server with some status. - * - * Does not block. As a result of enqueue the lock would be put - * into granted or waiting list. - */ -enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns, - struct ldlm_lock **lockp, - void *cookie, __u64 *flags) -{ - struct ldlm_lock *lock = *lockp; - struct ldlm_resource *res = lock->l_resource; - - lock_res_and_lock(lock); - if (lock->l_req_mode == lock->l_granted_mode) { - /* The server returned a blocked lock, but it was granted - * before we got a chance to actually enqueue it. We don't - * need to do anything else. - */ - *flags &= ~LDLM_FL_BLOCKED_MASK; - goto out; - } - - ldlm_resource_unlink_lock(lock); - - /* Cannot happen unless on the server */ - if (res->lr_type == LDLM_EXTENT && !lock->l_tree_node) - LBUG(); - - /* Some flags from the enqueue want to make it into the AST, via the - * lock's l_flags. - */ - if (*flags & LDLM_FL_AST_DISCARD_DATA) - ldlm_set_ast_discard_data(lock); - if (*flags & LDLM_FL_TEST_LOCK) - ldlm_set_test_lock(lock); - - /* - * This distinction between local lock trees is very important; a client - * namespace only has information about locks taken by that client, and - * thus doesn't have enough information to decide for itself if it can - * be granted (below). In this case, we do exactly what the server - * tells us to do, as dictated by the 'flags'. - */ - if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED)) - ldlm_resource_add_lock(res, &res->lr_waiting, lock); - else - ldlm_grant_lock(lock, NULL); - -out: - unlock_res_and_lock(lock); - return ELDLM_OK; -} - -/** - * Process a call to blocking AST callback for a lock in ast_work list - */ -static int -ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) -{ - struct ldlm_cb_set_arg *arg = opaq; - struct ldlm_lock_desc d; - int rc; - struct ldlm_lock *lock; - - if (list_empty(arg->list)) - return -ENOENT; - - lock = list_first_entry(arg->list, struct ldlm_lock, l_bl_ast); - - /* nobody should touch l_bl_ast */ - lock_res_and_lock(lock); - list_del_init(&lock->l_bl_ast); - - LASSERT(ldlm_is_ast_sent(lock)); - LASSERT(lock->l_bl_ast_run == 0); - LASSERT(lock->l_blocking_lock); - lock->l_bl_ast_run++; - unlock_res_and_lock(lock); - - ldlm_lock2desc(lock->l_blocking_lock, &d); - - rc = lock->l_blocking_ast(lock, &d, (void *)arg, LDLM_CB_BLOCKING); - LDLM_LOCK_RELEASE(lock->l_blocking_lock); - lock->l_blocking_lock = NULL; - LDLM_LOCK_RELEASE(lock); - - return rc; -} - -/** - * Process a call to completion AST callback for a lock in ast_work list - */ -static int -ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) -{ - struct ldlm_cb_set_arg *arg = opaq; - int rc = 0; - struct ldlm_lock *lock; - ldlm_completion_callback completion_callback; - - if (list_empty(arg->list)) - return -ENOENT; - - lock = list_first_entry(arg->list, struct ldlm_lock, l_cp_ast); - - /* It's possible to receive a completion AST before we've set - * the l_completion_ast pointer: either because the AST arrived - * before the reply, or simply because there's a small race - * window between receiving the reply and finishing the local - * enqueue. (bug 842) - * - * This can't happen with the blocking_ast, however, because we - * will never call the local blocking_ast until we drop our - * reader/writer reference, which we won't do until we get the - * reply and finish enqueueing. - */ - - /* nobody should touch l_cp_ast */ - lock_res_and_lock(lock); - list_del_init(&lock->l_cp_ast); - LASSERT(ldlm_is_cp_reqd(lock)); - /* save l_completion_ast since it can be changed by - * mds_intent_policy(), see bug 14225 - */ - completion_callback = lock->l_completion_ast; - ldlm_clear_cp_reqd(lock); - unlock_res_and_lock(lock); - - if (completion_callback) - rc = completion_callback(lock, 0, (void *)arg); - LDLM_LOCK_RELEASE(lock); - - return rc; -} - -/** - * Process a call to revocation AST callback for a lock in ast_work list - */ -static int -ldlm_work_revoke_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) -{ - struct ldlm_cb_set_arg *arg = opaq; - struct ldlm_lock_desc desc; - int rc; - struct ldlm_lock *lock; - - if (list_empty(arg->list)) - return -ENOENT; - - lock = list_first_entry(arg->list, struct ldlm_lock, l_rk_ast); - list_del_init(&lock->l_rk_ast); - - /* the desc just pretend to exclusive */ - ldlm_lock2desc(lock, &desc); - desc.l_req_mode = LCK_EX; - desc.l_granted_mode = 0; - - rc = lock->l_blocking_ast(lock, &desc, (void *)arg, LDLM_CB_BLOCKING); - LDLM_LOCK_RELEASE(lock); - - return rc; -} - -/** - * Process a call to glimpse AST callback for a lock in ast_work list - */ -static int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) -{ - struct ldlm_cb_set_arg *arg = opaq; - struct ldlm_glimpse_work *gl_work; - struct ldlm_lock *lock; - int rc = 0; - - if (list_empty(arg->list)) - return -ENOENT; - - gl_work = list_first_entry(arg->list, struct ldlm_glimpse_work, - gl_list); - list_del_init(&gl_work->gl_list); - - lock = gl_work->gl_lock; - - /* transfer the glimpse descriptor to ldlm_cb_set_arg */ - arg->gl_desc = gl_work->gl_desc; - - /* invoke the actual glimpse callback */ - if (lock->l_glimpse_ast(lock, (void *)arg) == 0) - rc = 1; - - LDLM_LOCK_RELEASE(lock); - - if ((gl_work->gl_flags & LDLM_GL_WORK_NOFREE) == 0) - kfree(gl_work); - - return rc; -} - -/** - * Process list of locks in need of ASTs being sent. - * - * Used on server to send multiple ASTs together instead of sending one by - * one. - */ -int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list, - enum ldlm_desc_ast_t ast_type) -{ - struct ldlm_cb_set_arg *arg; - set_producer_func work_ast_lock; - int rc; - - if (list_empty(rpc_list)) - return 0; - - arg = kzalloc(sizeof(*arg), GFP_NOFS); - if (!arg) - return -ENOMEM; - - atomic_set(&arg->restart, 0); - arg->list = rpc_list; - - switch (ast_type) { - case LDLM_WORK_BL_AST: - arg->type = LDLM_BL_CALLBACK; - work_ast_lock = ldlm_work_bl_ast_lock; - break; - case LDLM_WORK_CP_AST: - arg->type = LDLM_CP_CALLBACK; - work_ast_lock = ldlm_work_cp_ast_lock; - break; - case LDLM_WORK_REVOKE_AST: - arg->type = LDLM_BL_CALLBACK; - work_ast_lock = ldlm_work_revoke_ast_lock; - break; - case LDLM_WORK_GL_AST: - arg->type = LDLM_GL_CALLBACK; - work_ast_lock = ldlm_work_gl_ast_lock; - break; - default: - LBUG(); - } - - /* We create a ptlrpc request set with flow control extension. - * This request set will use the work_ast_lock function to produce new - * requests and will send a new request each time one completes in order - * to keep the number of requests in flight to ns_max_parallel_ast - */ - arg->set = ptlrpc_prep_fcset(ns->ns_max_parallel_ast ? : UINT_MAX, - work_ast_lock, arg); - if (!arg->set) { - rc = -ENOMEM; - goto out; - } - - ptlrpc_set_wait(arg->set); - ptlrpc_set_destroy(arg->set); - - rc = atomic_read(&arg->restart) ? -ERESTART : 0; - goto out; -out: - kfree(arg); - return rc; -} - -static bool is_bl_done(struct ldlm_lock *lock) -{ - bool bl_done = true; - - if (!ldlm_is_bl_done(lock)) { - lock_res_and_lock(lock); - bl_done = ldlm_is_bl_done(lock); - unlock_res_and_lock(lock); - } - - return bl_done; -} - -/** - * Helper function to call blocking AST for LDLM lock \a lock in a - * "cancelling" mode. - */ -void ldlm_cancel_callback(struct ldlm_lock *lock) -{ - check_res_locked(lock->l_resource); - if (!ldlm_is_cancel(lock)) { - ldlm_set_cancel(lock); - if (lock->l_blocking_ast) { - unlock_res_and_lock(lock); - lock->l_blocking_ast(lock, NULL, lock->l_ast_data, - LDLM_CB_CANCELING); - lock_res_and_lock(lock); - } else { - LDLM_DEBUG(lock, "no blocking ast"); - } - /* only canceller can set bl_done bit */ - ldlm_set_bl_done(lock); - wake_up_all(&lock->l_waitq); - } else if (!ldlm_is_bl_done(lock)) { - /* - * The lock is guaranteed to have been canceled once - * returning from this function. - */ - unlock_res_and_lock(lock); - wait_event_idle(lock->l_waitq, is_bl_done(lock)); - lock_res_and_lock(lock); - } -} - -/** - * Remove skiplist-enabled LDLM lock \a req from granted list - */ -void ldlm_unlink_lock_skiplist(struct ldlm_lock *req) -{ - if (req->l_resource->lr_type != LDLM_PLAIN && - req->l_resource->lr_type != LDLM_IBITS) - return; - - list_del_init(&req->l_sl_policy); - list_del_init(&req->l_sl_mode); -} - -/** - * Attempts to cancel LDLM lock \a lock that has no reader/writer references. - */ -void ldlm_lock_cancel(struct ldlm_lock *lock) -{ - struct ldlm_resource *res; - struct ldlm_namespace *ns; - - lock_res_and_lock(lock); - - res = lock->l_resource; - ns = ldlm_res_to_ns(res); - - /* Please do not, no matter how tempting, remove this LBUG without - * talking to me first. -phik - */ - if (lock->l_readers || lock->l_writers) { - LDLM_ERROR(lock, "lock still has references"); - LBUG(); - } - - /* Releases cancel callback. */ - ldlm_cancel_callback(lock); - - ldlm_resource_unlink_lock(lock); - ldlm_lock_destroy_nolock(lock); - - if (lock->l_granted_mode == lock->l_req_mode) - ldlm_pool_del(&ns->ns_pool, lock); - - /* Make sure we will not be called again for same lock what is possible - * if not to zero out lock->l_granted_mode - */ - lock->l_granted_mode = LCK_MINMODE; - unlock_res_and_lock(lock); -} -EXPORT_SYMBOL(ldlm_lock_cancel); - -/** - * Set opaque data into the lock that only makes sense to upper layer. - */ -int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data) -{ - struct ldlm_lock *lock = ldlm_handle2lock(lockh); - int rc = -EINVAL; - - if (lock) { - if (!lock->l_ast_data) - lock->l_ast_data = data; - if (lock->l_ast_data == data) - rc = 0; - LDLM_LOCK_PUT(lock); - } - return rc; -} -EXPORT_SYMBOL(ldlm_lock_set_data); - -struct export_cl_data { - struct obd_export *ecl_exp; - int ecl_loop; -}; - -/** - * Print lock with lock handle \a lockh description into debug log. - * - * Used when printing all locks on a resource for debug purposes. - */ -void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh) -{ - struct ldlm_lock *lock; - - if (!((libcfs_debug | D_ERROR) & level)) - return; - - lock = ldlm_handle2lock(lockh); - if (!lock) - return; - - LDLM_DEBUG_LIMIT(level, lock, "###"); - - LDLM_LOCK_PUT(lock); -} -EXPORT_SYMBOL(ldlm_lock_dump_handle); - -/** - * Print lock information with custom message into debug log. - * Helper function. - */ -void _ldlm_lock_debug(struct ldlm_lock *lock, - struct libcfs_debug_msg_data *msgdata, - const char *fmt, ...) -{ - va_list args; - struct obd_export *exp = lock->l_export; - struct ldlm_resource *resource = lock->l_resource; - char *nid = "local"; - - va_start(args, fmt); - - if (exp && exp->exp_connection) { - nid = libcfs_nid2str(exp->exp_connection->c_peer.nid); - } else if (exp && exp->exp_obd) { - struct obd_import *imp = exp->exp_obd->u.cli.cl_import; - - nid = libcfs_nid2str(imp->imp_connection->c_peer.nid); - } - - if (!resource) { - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: \?\? lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: \?\? rrc=\?\? type: \?\?\? flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n", - lock, - lock->l_handle.h_cookie, - atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - lock->l_flags, nid, - lock->l_remote_handle.cookie, - exp ? atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, - lock->l_lvb_type); - va_end(args); - return; - } - - switch (resource->lr_type) { - case LDLM_EXTENT: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s [%llu->%llu] (req %llu->%llu) flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n", - ldlm_lock_to_ns_name(lock), lock, - lock->l_handle.h_cookie, - atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - PLDLMRES(resource), - atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_policy_data.l_extent.start, - lock->l_policy_data.l_extent.end, - lock->l_req_extent.start, - lock->l_req_extent.end, - lock->l_flags, nid, - lock->l_remote_handle.cookie, - exp ? atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, - lock->l_lvb_type); - break; - - case LDLM_FLOCK: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s pid: %d [%llu->%llu] flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu\n", - ldlm_lock_to_ns_name(lock), lock, - lock->l_handle.h_cookie, - atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - PLDLMRES(resource), - atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_policy_data.l_flock.pid, - lock->l_policy_data.l_flock.start, - lock->l_policy_data.l_flock.end, - lock->l_flags, nid, - lock->l_remote_handle.cookie, - exp ? atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout); - break; - - case LDLM_IBITS: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx rrc: %d type: %s flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n", - ldlm_lock_to_ns_name(lock), - lock, lock->l_handle.h_cookie, - atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - PLDLMRES(resource), - lock->l_policy_data.l_inodebits.bits, - atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_flags, nid, - lock->l_remote_handle.cookie, - exp ? atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, - lock->l_lvb_type); - break; - - default: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n", - ldlm_lock_to_ns_name(lock), - lock, lock->l_handle.h_cookie, - atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - PLDLMRES(resource), - atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_flags, nid, - lock->l_remote_handle.cookie, - exp ? atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, - lock->l_lvb_type); - break; - } - va_end(args); -} -EXPORT_SYMBOL(_ldlm_lock_debug); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c deleted file mode 100644 index c772c68e5a49..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c +++ /dev/null @@ -1,1163 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_lockd.c - * - * Author: Peter Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include <linux/libcfs/libcfs.h> -#include <lustre_dlm.h> -#include <obd_class.h> -#include <linux/list.h> -#include "ldlm_internal.h" - -static int ldlm_num_threads; -module_param(ldlm_num_threads, int, 0444); -MODULE_PARM_DESC(ldlm_num_threads, "number of DLM service threads to start"); - -static char *ldlm_cpts; -module_param(ldlm_cpts, charp, 0444); -MODULE_PARM_DESC(ldlm_cpts, "CPU partitions ldlm threads should run on"); - -static struct mutex ldlm_ref_mutex; -static int ldlm_refcount; - -static struct kobject *ldlm_kobj; -struct kset *ldlm_ns_kset; -static struct kset *ldlm_svc_kset; - -struct ldlm_cb_async_args { - struct ldlm_cb_set_arg *ca_set_arg; - struct ldlm_lock *ca_lock; -}; - -/* LDLM state */ - -static struct ldlm_state *ldlm_state; - -#define ELT_STOPPED 0 -#define ELT_READY 1 -#define ELT_TERMINATE 2 - -struct ldlm_bl_pool { - spinlock_t blp_lock; - - /* - * blp_prio_list is used for callbacks that should be handled - * as a priority. It is used for LDLM_FL_DISCARD_DATA requests. - * see bug 13843 - */ - struct list_head blp_prio_list; - - /* - * blp_list is used for all other callbacks which are likely - * to take longer to process. - */ - struct list_head blp_list; - - wait_queue_head_t blp_waitq; - struct completion blp_comp; - atomic_t blp_num_threads; - atomic_t blp_busy_threads; - int blp_min_threads; - int blp_max_threads; -}; - -struct ldlm_bl_work_item { - struct list_head blwi_entry; - struct ldlm_namespace *blwi_ns; - struct ldlm_lock_desc blwi_ld; - struct ldlm_lock *blwi_lock; - struct list_head blwi_head; - int blwi_count; - struct completion blwi_comp; - enum ldlm_cancel_flags blwi_flags; - int blwi_mem_pressure; -}; - -/** - * Callback handler for receiving incoming blocking ASTs. - * - * This can only happen on client side. - */ -void ldlm_handle_bl_callback(struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, struct ldlm_lock *lock) -{ - int do_ast; - - LDLM_DEBUG(lock, "client blocking AST callback handler"); - - lock_res_and_lock(lock); - ldlm_set_cbpending(lock); - - if (ldlm_is_cancel_on_block(lock)) - ldlm_set_cancel(lock); - - do_ast = !lock->l_readers && !lock->l_writers; - unlock_res_and_lock(lock); - - if (do_ast) { - CDEBUG(D_DLMTRACE, - "Lock %p already unused, calling callback (%p)\n", lock, - lock->l_blocking_ast); - if (lock->l_blocking_ast) - lock->l_blocking_ast(lock, ld, lock->l_ast_data, - LDLM_CB_BLOCKING); - } else { - CDEBUG(D_DLMTRACE, - "Lock %p is referenced, will be cancelled later\n", - lock); - } - - LDLM_DEBUG(lock, "client blocking callback handler END"); - LDLM_LOCK_RELEASE(lock); -} - -/** - * Callback handler for receiving incoming completion ASTs. - * - * This only can happen on client side. - */ -static void ldlm_handle_cp_callback(struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct ldlm_request *dlm_req, - struct ldlm_lock *lock) -{ - int lvb_len; - LIST_HEAD(ast_list); - int rc = 0; - - LDLM_DEBUG(lock, "client completion callback handler START"); - - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) { - int to = HZ; - - while (to > 0) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(to); - if (lock->l_granted_mode == lock->l_req_mode || - ldlm_is_destroyed(lock)) - break; - } - } - - lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT); - if (lvb_len < 0) { - LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", lvb_len); - rc = lvb_len; - goto out; - } else if (lvb_len > 0) { - if (lock->l_lvb_len > 0) { - /* for extent lock, lvb contains ost_lvb{}. */ - LASSERT(lock->l_lvb_data); - - if (unlikely(lock->l_lvb_len < lvb_len)) { - LDLM_ERROR(lock, - "Replied LVB is larger than expectation, expected = %d, replied = %d", - lock->l_lvb_len, lvb_len); - rc = -EINVAL; - goto out; - } - } else if (ldlm_has_layout(lock)) { /* for layout lock, lvb has - * variable length - */ - void *lvb_data; - - lvb_data = kzalloc(lvb_len, GFP_NOFS); - if (!lvb_data) { - LDLM_ERROR(lock, "No memory: %d.\n", lvb_len); - rc = -ENOMEM; - goto out; - } - - lock_res_and_lock(lock); - LASSERT(!lock->l_lvb_data); - lock->l_lvb_type = LVB_T_LAYOUT; - lock->l_lvb_data = lvb_data; - lock->l_lvb_len = lvb_len; - unlock_res_and_lock(lock); - } - } - - lock_res_and_lock(lock); - if (ldlm_is_destroyed(lock) || - lock->l_granted_mode == lock->l_req_mode) { - /* bug 11300: the lock has already been granted */ - unlock_res_and_lock(lock); - LDLM_DEBUG(lock, "Double grant race happened"); - rc = 0; - goto out; - } - - /* If we receive the completion AST before the actual enqueue returned, - * then we might need to switch lock modes, resources, or extents. - */ - if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) { - lock->l_req_mode = dlm_req->lock_desc.l_granted_mode; - LDLM_DEBUG(lock, "completion AST, new lock mode"); - } - - if (lock->l_resource->lr_type != LDLM_PLAIN) { - ldlm_convert_policy_to_local(req->rq_export, - dlm_req->lock_desc.l_resource.lr_type, - &dlm_req->lock_desc.l_policy_data, - &lock->l_policy_data); - LDLM_DEBUG(lock, "completion AST, new policy data"); - } - - ldlm_resource_unlink_lock(lock); - if (memcmp(&dlm_req->lock_desc.l_resource.lr_name, - &lock->l_resource->lr_name, - sizeof(lock->l_resource->lr_name)) != 0) { - unlock_res_and_lock(lock); - rc = ldlm_lock_change_resource(ns, lock, - &dlm_req->lock_desc.l_resource.lr_name); - if (rc < 0) { - LDLM_ERROR(lock, "Failed to allocate resource"); - goto out; - } - LDLM_DEBUG(lock, "completion AST, new resource"); - CERROR("change resource!\n"); - lock_res_and_lock(lock); - } - - if (dlm_req->lock_flags & LDLM_FL_AST_SENT) { - /* BL_AST locks are not needed in LRU. - * Let ldlm_cancel_lru() be fast. - */ - ldlm_lock_remove_from_lru(lock); - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST; - LDLM_DEBUG(lock, "completion AST includes blocking AST"); - } - - if (lock->l_lvb_len > 0) { - rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_CLIENT, - lock->l_lvb_data, lvb_len); - if (rc < 0) { - unlock_res_and_lock(lock); - goto out; - } - } - - ldlm_grant_lock(lock, &ast_list); - unlock_res_and_lock(lock); - - LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work"); - - /* Let Enqueue to call osc_lock_upcall() and initialize l_ast_data */ - OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 2); - - ldlm_run_ast_work(ns, &ast_list, LDLM_WORK_CP_AST); - - LDLM_DEBUG_NOLOCK("client completion callback handler END (lock %p)", - lock); - goto out; - -out: - if (rc < 0) { - lock_res_and_lock(lock); - ldlm_set_failed(lock); - unlock_res_and_lock(lock); - wake_up(&lock->l_waitq); - } - LDLM_LOCK_RELEASE(lock); -} - -/** - * Callback handler for receiving incoming glimpse ASTs. - * - * This only can happen on client side. After handling the glimpse AST - * we also consider dropping the lock here if it is unused locally for a - * long time. - */ -static void ldlm_handle_gl_callback(struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct ldlm_request *dlm_req, - struct ldlm_lock *lock) -{ - int rc = -ENOSYS; - - LDLM_DEBUG(lock, "client glimpse AST callback handler"); - - if (lock->l_glimpse_ast) - rc = lock->l_glimpse_ast(lock, req); - - if (req->rq_repmsg) { - ptlrpc_reply(req); - } else { - req->rq_status = rc; - ptlrpc_error(req); - } - - lock_res_and_lock(lock); - if (lock->l_granted_mode == LCK_PW && - !lock->l_readers && !lock->l_writers && - cfs_time_after(cfs_time_current(), - cfs_time_add(lock->l_last_used, - 10 * HZ))) { - unlock_res_and_lock(lock); - if (ldlm_bl_to_thread_lock(ns, NULL, lock)) - ldlm_handle_bl_callback(ns, NULL, lock); - - return; - } - unlock_res_and_lock(lock); - LDLM_LOCK_RELEASE(lock); -} - -static int ldlm_callback_reply(struct ptlrpc_request *req, int rc) -{ - if (req->rq_no_reply) - return 0; - - req->rq_status = rc; - if (!req->rq_packed_final) { - rc = lustre_pack_reply(req, 1, NULL, NULL); - if (rc) - return rc; - } - return ptlrpc_reply(req); -} - -static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi, - enum ldlm_cancel_flags cancel_flags) -{ - struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool; - - spin_lock(&blp->blp_lock); - if (blwi->blwi_lock && ldlm_is_discard_data(blwi->blwi_lock)) { - /* add LDLM_FL_DISCARD_DATA requests to the priority list */ - list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list); - } else { - /* other blocking callbacks are added to the regular list */ - list_add_tail(&blwi->blwi_entry, &blp->blp_list); - } - spin_unlock(&blp->blp_lock); - - wake_up(&blp->blp_waitq); - - /* can not check blwi->blwi_flags as blwi could be already freed in - * LCF_ASYNC mode - */ - if (!(cancel_flags & LCF_ASYNC)) - wait_for_completion(&blwi->blwi_comp); - - return 0; -} - -static inline void init_blwi(struct ldlm_bl_work_item *blwi, - struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, - struct list_head *cancels, int count, - struct ldlm_lock *lock, - enum ldlm_cancel_flags cancel_flags) -{ - init_completion(&blwi->blwi_comp); - INIT_LIST_HEAD(&blwi->blwi_head); - - if (memory_pressure_get()) - blwi->blwi_mem_pressure = 1; - - blwi->blwi_ns = ns; - blwi->blwi_flags = cancel_flags; - if (ld) - blwi->blwi_ld = *ld; - if (count) { - list_add(&blwi->blwi_head, cancels); - list_del_init(cancels); - blwi->blwi_count = count; - } else { - blwi->blwi_lock = lock; - } -} - -/** - * Queues a list of locks \a cancels containing \a count locks - * for later processing by a blocking thread. If \a count is zero, - * then the lock referenced as \a lock is queued instead. - * - * The blocking thread would then call ->l_blocking_ast callback in the lock. - * If list addition fails an error is returned and caller is supposed to - * call ->l_blocking_ast itself. - */ -static int ldlm_bl_to_thread(struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, - struct ldlm_lock *lock, - struct list_head *cancels, int count, - enum ldlm_cancel_flags cancel_flags) -{ - if (cancels && count == 0) - return 0; - - if (cancel_flags & LCF_ASYNC) { - struct ldlm_bl_work_item *blwi; - - blwi = kzalloc(sizeof(*blwi), GFP_NOFS); - if (!blwi) - return -ENOMEM; - init_blwi(blwi, ns, ld, cancels, count, lock, cancel_flags); - - return __ldlm_bl_to_thread(blwi, cancel_flags); - } else { - /* if it is synchronous call do minimum mem alloc, as it could - * be triggered from kernel shrinker - */ - struct ldlm_bl_work_item blwi; - - memset(&blwi, 0, sizeof(blwi)); - init_blwi(&blwi, ns, ld, cancels, count, lock, cancel_flags); - return __ldlm_bl_to_thread(&blwi, cancel_flags); - } -} - -int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, - struct ldlm_lock *lock) -{ - return ldlm_bl_to_thread(ns, ld, lock, NULL, 0, LCF_ASYNC); -} - -int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, - struct list_head *cancels, int count, - enum ldlm_cancel_flags cancel_flags) -{ - return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags); -} - -int ldlm_bl_thread_wakeup(void) -{ - wake_up(&ldlm_state->ldlm_bl_pool->blp_waitq); - return 0; -} - -/* Setinfo coming from Server (eg MDT) to Client (eg MDC)! */ -static int ldlm_handle_setinfo(struct ptlrpc_request *req) -{ - struct obd_device *obd = req->rq_export->exp_obd; - char *key; - void *val; - int keylen, vallen; - int rc = -ENOSYS; - - DEBUG_REQ(D_HSM, req, "%s: handle setinfo\n", obd->obd_name); - - req_capsule_set(&req->rq_pill, &RQF_OBD_SET_INFO); - - key = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY); - if (!key) { - DEBUG_REQ(D_IOCTL, req, "no set_info key"); - return -EFAULT; - } - keylen = req_capsule_get_size(&req->rq_pill, &RMF_SETINFO_KEY, - RCL_CLIENT); - val = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL); - if (!val) { - DEBUG_REQ(D_IOCTL, req, "no set_info val"); - return -EFAULT; - } - vallen = req_capsule_get_size(&req->rq_pill, &RMF_SETINFO_VAL, - RCL_CLIENT); - - /* We are responsible for swabbing contents of val */ - - if (KEY_IS(KEY_HSM_COPYTOOL_SEND)) - /* Pass it on to mdc (the "export" in this case) */ - rc = obd_set_info_async(req->rq_svc_thread->t_env, - req->rq_export, - sizeof(KEY_HSM_COPYTOOL_SEND), - KEY_HSM_COPYTOOL_SEND, - vallen, val, NULL); - else - DEBUG_REQ(D_WARNING, req, "ignoring unknown key %s", key); - - return rc; -} - -static inline void ldlm_callback_errmsg(struct ptlrpc_request *req, - const char *msg, int rc, - const struct lustre_handle *handle) -{ - DEBUG_REQ((req->rq_no_reply || rc) ? D_WARNING : D_DLMTRACE, req, - "%s: [nid %s] [rc %d] [lock %#llx]", - msg, libcfs_id2str(req->rq_peer), rc, - handle ? handle->cookie : 0); - if (req->rq_no_reply) - CWARN("No reply was sent, maybe cause bug 21636.\n"); - else if (rc) - CWARN("Send reply failed, maybe cause bug 21636.\n"); -} - -/* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */ -static int ldlm_callback_handler(struct ptlrpc_request *req) -{ - struct ldlm_namespace *ns; - struct ldlm_request *dlm_req; - struct ldlm_lock *lock; - int rc; - - /* Requests arrive in sender's byte order. The ptlrpc service - * handler has already checked and, if necessary, byte-swapped the - * incoming request message body, but I am responsible for the - * message buffers. - */ - - /* do nothing for sec context finalize */ - if (lustre_msg_get_opc(req->rq_reqmsg) == SEC_CTX_FINI) - return 0; - - req_capsule_init(&req->rq_pill, req, RCL_SERVER); - - if (!req->rq_export) { - rc = ldlm_callback_reply(req, -ENOTCONN); - ldlm_callback_errmsg(req, "Operate on unconnected server", - rc, NULL); - return 0; - } - - LASSERT(req->rq_export->exp_obd); - - switch (lustre_msg_get_opc(req->rq_reqmsg)) { - case LDLM_BL_CALLBACK: - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) { - if (cfs_fail_err) - ldlm_callback_reply(req, -(int)cfs_fail_err); - return 0; - } - break; - case LDLM_CP_CALLBACK: - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CP_CALLBACK_NET)) - return 0; - break; - case LDLM_GL_CALLBACK: - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GL_CALLBACK_NET)) - return 0; - break; - case LDLM_SET_INFO: - rc = ldlm_handle_setinfo(req); - ldlm_callback_reply(req, rc); - return 0; - default: - CERROR("unknown opcode %u\n", - lustre_msg_get_opc(req->rq_reqmsg)); - ldlm_callback_reply(req, -EPROTO); - return 0; - } - - ns = req->rq_export->exp_obd->obd_namespace; - LASSERT(ns); - - req_capsule_set(&req->rq_pill, &RQF_LDLM_CALLBACK); - - dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); - if (!dlm_req) { - rc = ldlm_callback_reply(req, -EPROTO); - ldlm_callback_errmsg(req, "Operate without parameter", rc, - NULL); - return 0; - } - - /* Force a known safe race, send a cancel to the server for a lock - * which the server has already started a blocking callback on. - */ - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) && - lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { - rc = ldlm_cli_cancel(&dlm_req->lock_handle[0], 0); - if (rc < 0) - CERROR("ldlm_cli_cancel: %d\n", rc); - } - - lock = ldlm_handle2lock_long(&dlm_req->lock_handle[0], 0); - if (!lock) { - CDEBUG(D_DLMTRACE, - "callback on lock %#llx - lock disappeared\n", - dlm_req->lock_handle[0].cookie); - rc = ldlm_callback_reply(req, -EINVAL); - ldlm_callback_errmsg(req, "Operate with invalid parameter", rc, - &dlm_req->lock_handle[0]); - return 0; - } - - if (ldlm_is_fail_loc(lock) && - lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) - OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - - /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */ - lock_res_and_lock(lock); - lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags & - LDLM_FL_AST_MASK); - if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { - /* If somebody cancels lock and cache is already dropped, - * or lock is failed before cp_ast received on client, - * we can tell the server we have no lock. Otherwise, we - * should send cancel after dropping the cache. - */ - if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) || - ldlm_is_failed(lock)) { - LDLM_DEBUG(lock, - "callback on lock %#llx - lock disappeared", - dlm_req->lock_handle[0].cookie); - unlock_res_and_lock(lock); - LDLM_LOCK_RELEASE(lock); - rc = ldlm_callback_reply(req, -EINVAL); - ldlm_callback_errmsg(req, "Operate on stale lock", rc, - &dlm_req->lock_handle[0]); - return 0; - } - /* BL_AST locks are not needed in LRU. - * Let ldlm_cancel_lru() be fast. - */ - ldlm_lock_remove_from_lru(lock); - ldlm_set_bl_ast(lock); - } - unlock_res_and_lock(lock); - - /* We want the ost thread to get this reply so that it can respond - * to ost requests (write cache writeback) that might be triggered - * in the callback. - * - * But we'd also like to be able to indicate in the reply that we're - * cancelling right now, because it's unused, or have an intent result - * in the reply, so we might have to push the responsibility for sending - * the reply down into the AST handlers, alas. - */ - - switch (lustre_msg_get_opc(req->rq_reqmsg)) { - case LDLM_BL_CALLBACK: - CDEBUG(D_INODE, "blocking ast\n"); - req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK); - if (!ldlm_is_cancel_on_block(lock)) { - rc = ldlm_callback_reply(req, 0); - if (req->rq_no_reply || rc) - ldlm_callback_errmsg(req, "Normal process", rc, - &dlm_req->lock_handle[0]); - } - if (ldlm_bl_to_thread_lock(ns, &dlm_req->lock_desc, lock)) - ldlm_handle_bl_callback(ns, &dlm_req->lock_desc, lock); - break; - case LDLM_CP_CALLBACK: - CDEBUG(D_INODE, "completion ast\n"); - req_capsule_extend(&req->rq_pill, &RQF_LDLM_CP_CALLBACK); - ldlm_callback_reply(req, 0); - ldlm_handle_cp_callback(req, ns, dlm_req, lock); - break; - case LDLM_GL_CALLBACK: - CDEBUG(D_INODE, "glimpse ast\n"); - req_capsule_extend(&req->rq_pill, &RQF_LDLM_GL_CALLBACK); - ldlm_handle_gl_callback(req, ns, dlm_req, lock); - break; - default: - LBUG(); /* checked above */ - } - - return 0; -} - -static int ldlm_bl_get_work(struct ldlm_bl_pool *blp, - struct ldlm_bl_work_item **p_blwi, - struct obd_export **p_exp) -{ - int num_th = atomic_read(&blp->blp_num_threads); - struct ldlm_bl_work_item *blwi = NULL; - static unsigned int num_bl; - - spin_lock(&blp->blp_lock); - /* process a request from the blp_list at least every blp_num_threads */ - if (!list_empty(&blp->blp_list) && - (list_empty(&blp->blp_prio_list) || num_bl == 0)) - blwi = list_first_entry(&blp->blp_list, - struct ldlm_bl_work_item, blwi_entry); - else - if (!list_empty(&blp->blp_prio_list)) - blwi = list_first_entry(&blp->blp_prio_list, - struct ldlm_bl_work_item, - blwi_entry); - - if (blwi) { - if (++num_bl >= num_th) - num_bl = 0; - list_del(&blwi->blwi_entry); - } - spin_unlock(&blp->blp_lock); - *p_blwi = blwi; - - return (*p_blwi || *p_exp) ? 1 : 0; -} - -/* This only contains temporary data until the thread starts */ -struct ldlm_bl_thread_data { - struct ldlm_bl_pool *bltd_blp; - struct completion bltd_comp; - int bltd_num; -}; - -static int ldlm_bl_thread_main(void *arg); - -static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp, bool check_busy) -{ - struct ldlm_bl_thread_data bltd = { .bltd_blp = blp }; - struct task_struct *task; - - init_completion(&bltd.bltd_comp); - - bltd.bltd_num = atomic_inc_return(&blp->blp_num_threads); - if (bltd.bltd_num >= blp->blp_max_threads) { - atomic_dec(&blp->blp_num_threads); - return 0; - } - - LASSERTF(bltd.bltd_num > 0, "thread num:%d\n", bltd.bltd_num); - if (check_busy && - atomic_read(&blp->blp_busy_threads) < (bltd.bltd_num - 1)) { - atomic_dec(&blp->blp_num_threads); - return 0; - } - - task = kthread_run(ldlm_bl_thread_main, &bltd, "ldlm_bl_%02d", - bltd.bltd_num); - if (IS_ERR(task)) { - CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n", - bltd.bltd_num, PTR_ERR(task)); - atomic_dec(&blp->blp_num_threads); - return PTR_ERR(task); - } - wait_for_completion(&bltd.bltd_comp); - - return 0; -} - -/* Not fatal if racy and have a few too many threads */ -static int ldlm_bl_thread_need_create(struct ldlm_bl_pool *blp, - struct ldlm_bl_work_item *blwi) -{ - if (atomic_read(&blp->blp_num_threads) >= blp->blp_max_threads) - return 0; - - if (atomic_read(&blp->blp_busy_threads) < - atomic_read(&blp->blp_num_threads)) - return 0; - - if (blwi && (!blwi->blwi_ns || blwi->blwi_mem_pressure)) - return 0; - - return 1; -} - -static int ldlm_bl_thread_blwi(struct ldlm_bl_pool *blp, - struct ldlm_bl_work_item *blwi) -{ - if (!blwi->blwi_ns) - /* added by ldlm_cleanup() */ - return LDLM_ITER_STOP; - - if (blwi->blwi_mem_pressure) - memory_pressure_set(); - - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL2, 4); - - if (blwi->blwi_count) { - int count; - - /* - * The special case when we cancel locks in lru - * asynchronously, we pass the list of locks here. - * Thus locks are marked LDLM_FL_CANCELING, but NOT - * canceled locally yet. - */ - count = ldlm_cli_cancel_list_local(&blwi->blwi_head, - blwi->blwi_count, - LCF_BL_AST); - ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL, - blwi->blwi_flags); - } else { - ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld, - blwi->blwi_lock); - } - if (blwi->blwi_mem_pressure) - memory_pressure_clr(); - - if (blwi->blwi_flags & LCF_ASYNC) - kfree(blwi); - else - complete(&blwi->blwi_comp); - - return 0; -} - -/** - * Main blocking requests processing thread. - * - * Callers put locks into its queue by calling ldlm_bl_to_thread. - * This thread in the end ends up doing actual call to ->l_blocking_ast - * for queued locks. - */ -static int ldlm_bl_thread_main(void *arg) -{ - struct ldlm_bl_pool *blp; - struct ldlm_bl_thread_data *bltd = arg; - - blp = bltd->bltd_blp; - - complete(&bltd->bltd_comp); - /* cannot use bltd after this, it is only on caller's stack */ - - while (1) { - struct ldlm_bl_work_item *blwi = NULL; - struct obd_export *exp = NULL; - int rc; - - rc = ldlm_bl_get_work(blp, &blwi, &exp); - if (!rc) - wait_event_idle_exclusive(blp->blp_waitq, - ldlm_bl_get_work(blp, &blwi, - &exp)); - atomic_inc(&blp->blp_busy_threads); - - if (ldlm_bl_thread_need_create(blp, blwi)) - /* discard the return value, we tried */ - ldlm_bl_thread_start(blp, true); - - if (blwi) - rc = ldlm_bl_thread_blwi(blp, blwi); - - atomic_dec(&blp->blp_busy_threads); - - if (rc == LDLM_ITER_STOP) - break; - } - - atomic_dec(&blp->blp_num_threads); - complete(&blp->blp_comp); - return 0; -} - -static int ldlm_setup(void); -static int ldlm_cleanup(void); - -int ldlm_get_ref(void) -{ - int rc = 0; - - rc = ptlrpc_inc_ref(); - if (rc) - return rc; - - mutex_lock(&ldlm_ref_mutex); - if (++ldlm_refcount == 1) { - rc = ldlm_setup(); - if (rc) - ldlm_refcount--; - } - mutex_unlock(&ldlm_ref_mutex); - - if (rc) - ptlrpc_dec_ref(); - - return rc; -} - -void ldlm_put_ref(void) -{ - int rc = 0; - mutex_lock(&ldlm_ref_mutex); - if (ldlm_refcount == 1) { - rc = ldlm_cleanup(); - - if (rc) - CERROR("ldlm_cleanup failed: %d\n", rc); - else - ldlm_refcount--; - } else { - ldlm_refcount--; - } - mutex_unlock(&ldlm_ref_mutex); - if (!rc) - ptlrpc_dec_ref(); -} - -static ssize_t cancel_unused_locks_before_replay_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", ldlm_cancel_unused_locks_before_replay); -} - -static ssize_t cancel_unused_locks_before_replay_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - ldlm_cancel_unused_locks_before_replay = val; - - return count; -} -LUSTRE_RW_ATTR(cancel_unused_locks_before_replay); - -/* These are for root of /sys/fs/lustre/ldlm */ -static struct attribute *ldlm_attrs[] = { - &lustre_attr_cancel_unused_locks_before_replay.attr, - NULL, -}; - -static const struct attribute_group ldlm_attr_group = { - .attrs = ldlm_attrs, -}; - -static int ldlm_setup(void) -{ - static struct ptlrpc_service_conf conf; - struct ldlm_bl_pool *blp = NULL; - int rc = 0; - int i; - - if (ldlm_state) - return -EALREADY; - - ldlm_state = kzalloc(sizeof(*ldlm_state), GFP_NOFS); - if (!ldlm_state) - return -ENOMEM; - - ldlm_kobj = kobject_create_and_add("ldlm", lustre_kobj); - if (!ldlm_kobj) { - rc = -ENOMEM; - goto out; - } - - rc = sysfs_create_group(ldlm_kobj, &ldlm_attr_group); - if (rc) - goto out; - - ldlm_ns_kset = kset_create_and_add("namespaces", NULL, ldlm_kobj); - if (!ldlm_ns_kset) { - rc = -ENOMEM; - goto out; - } - - ldlm_svc_kset = kset_create_and_add("services", NULL, ldlm_kobj); - if (!ldlm_svc_kset) { - rc = -ENOMEM; - goto out; - } - - rc = ldlm_debugfs_setup(); - if (rc != 0) - goto out; - - memset(&conf, 0, sizeof(conf)); - conf = (typeof(conf)) { - .psc_name = "ldlm_cbd", - .psc_watchdog_factor = 2, - .psc_buf = { - .bc_nbufs = LDLM_CLIENT_NBUFS, - .bc_buf_size = LDLM_BUFSIZE, - .bc_req_max_size = LDLM_MAXREQSIZE, - .bc_rep_max_size = LDLM_MAXREPSIZE, - .bc_req_portal = LDLM_CB_REQUEST_PORTAL, - .bc_rep_portal = LDLM_CB_REPLY_PORTAL, - }, - .psc_thr = { - .tc_thr_name = "ldlm_cb", - .tc_thr_factor = LDLM_THR_FACTOR, - .tc_nthrs_init = LDLM_NTHRS_INIT, - .tc_nthrs_base = LDLM_NTHRS_BASE, - .tc_nthrs_max = LDLM_NTHRS_MAX, - .tc_nthrs_user = ldlm_num_threads, - .tc_cpu_affinity = 1, - .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD, - }, - .psc_cpt = { - .cc_pattern = ldlm_cpts, - }, - .psc_ops = { - .so_req_handler = ldlm_callback_handler, - }, - }; - ldlm_state->ldlm_cb_service = - ptlrpc_register_service(&conf, ldlm_svc_kset, - ldlm_svc_debugfs_dir); - if (IS_ERR(ldlm_state->ldlm_cb_service)) { - CERROR("failed to start service\n"); - rc = PTR_ERR(ldlm_state->ldlm_cb_service); - ldlm_state->ldlm_cb_service = NULL; - goto out; - } - - blp = kzalloc(sizeof(*blp), GFP_NOFS); - if (!blp) { - rc = -ENOMEM; - goto out; - } - ldlm_state->ldlm_bl_pool = blp; - - spin_lock_init(&blp->blp_lock); - INIT_LIST_HEAD(&blp->blp_list); - INIT_LIST_HEAD(&blp->blp_prio_list); - init_waitqueue_head(&blp->blp_waitq); - atomic_set(&blp->blp_num_threads, 0); - atomic_set(&blp->blp_busy_threads, 0); - - if (ldlm_num_threads == 0) { - blp->blp_min_threads = LDLM_NTHRS_INIT; - blp->blp_max_threads = LDLM_NTHRS_MAX; - } else { - blp->blp_min_threads = min_t(int, LDLM_NTHRS_MAX, - max_t(int, LDLM_NTHRS_INIT, - ldlm_num_threads)); - - blp->blp_max_threads = blp->blp_min_threads; - } - - for (i = 0; i < blp->blp_min_threads; i++) { - rc = ldlm_bl_thread_start(blp, false); - if (rc < 0) - goto out; - } - - rc = ldlm_pools_init(); - if (rc) { - CERROR("Failed to initialize LDLM pools: %d\n", rc); - goto out; - } - return 0; - - out: - ldlm_cleanup(); - return rc; -} - -static int ldlm_cleanup(void) -{ - if (!list_empty(ldlm_namespace_list(LDLM_NAMESPACE_SERVER)) || - !list_empty(ldlm_namespace_list(LDLM_NAMESPACE_CLIENT))) { - CERROR("ldlm still has namespaces; clean these up first.\n"); - ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE); - ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE); - return -EBUSY; - } - - ldlm_pools_fini(); - - if (ldlm_state->ldlm_bl_pool) { - struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool; - - while (atomic_read(&blp->blp_num_threads) > 0) { - struct ldlm_bl_work_item blwi = { .blwi_ns = NULL }; - - init_completion(&blp->blp_comp); - - spin_lock(&blp->blp_lock); - list_add_tail(&blwi.blwi_entry, &blp->blp_list); - wake_up(&blp->blp_waitq); - spin_unlock(&blp->blp_lock); - - wait_for_completion(&blp->blp_comp); - } - - kfree(blp); - } - - if (ldlm_state->ldlm_cb_service) - ptlrpc_unregister_service(ldlm_state->ldlm_cb_service); - - if (ldlm_ns_kset) - kset_unregister(ldlm_ns_kset); - if (ldlm_svc_kset) - kset_unregister(ldlm_svc_kset); - if (ldlm_kobj) { - sysfs_remove_group(ldlm_kobj, &ldlm_attr_group); - kobject_put(ldlm_kobj); - } - - ldlm_debugfs_cleanup(); - - kfree(ldlm_state); - ldlm_state = NULL; - - return 0; -} - -int ldlm_init(void) -{ - mutex_init(&ldlm_ref_mutex); - mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER)); - mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT)); - ldlm_resource_slab = kmem_cache_create("ldlm_resources", - sizeof(struct ldlm_resource), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!ldlm_resource_slab) - return -ENOMEM; - - ldlm_lock_slab = kmem_cache_create("ldlm_locks", - sizeof(struct ldlm_lock), 0, - SLAB_HWCACHE_ALIGN | - SLAB_TYPESAFE_BY_RCU, NULL); - if (!ldlm_lock_slab) { - kmem_cache_destroy(ldlm_resource_slab); - return -ENOMEM; - } - - ldlm_interval_slab = kmem_cache_create("interval_node", - sizeof(struct ldlm_interval), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ldlm_interval_slab) { - kmem_cache_destroy(ldlm_resource_slab); - kmem_cache_destroy(ldlm_lock_slab); - return -ENOMEM; - } -#if LUSTRE_TRACKS_LOCK_EXP_REFS - class_export_dump_hook = ldlm_dump_export_locks; -#endif - return 0; -} - -void ldlm_exit(void) -{ - if (ldlm_refcount) - CERROR("ldlm_refcount is %d in %s!\n", ldlm_refcount, __func__); - kmem_cache_destroy(ldlm_resource_slab); - /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call - * synchronize_rcu() to wait a grace period elapsed, so that - * ldlm_lock_free() get a chance to be called. - */ - synchronize_rcu(); - kmem_cache_destroy(ldlm_lock_slab); - kmem_cache_destroy(ldlm_interval_slab); -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c deleted file mode 100644 index 33b5a3f96fcb..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c +++ /dev/null @@ -1,68 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_plain.c - * - * Author: Peter Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - */ - -/** - * This file contains implementation of PLAIN lock type. - * - * PLAIN locks are the simplest form of LDLM locking, and are used when - * there only needs to be a single lock on a resource. This avoids some - * of the complexity of EXTENT and IBITS lock types, but doesn't allow - * different "parts" of a resource to be locked concurrently. Example - * use cases for PLAIN locks include locking of MGS configuration logs - * and (as of Lustre 2.4) quota records. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include <lustre_dlm.h> -#include <obd_support.h> -#include <lustre_lib.h> - -#include "ldlm_internal.h" - -void ldlm_plain_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy) -{ - /* No policy for plain locks */ -} - -void ldlm_plain_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy) -{ - /* No policy for plain locks */ -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c deleted file mode 100644 index 53b8f33e54b5..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ /dev/null @@ -1,1023 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_pool.c - * - * Author: Yury Umanets <umka@clusterfs.com> - */ - -/* - * Idea of this code is rather simple. Each second, for each server namespace - * we have SLV - server lock volume which is calculated on current number of - * granted locks, grant speed for past period, etc - that is, locking load. - * This SLV number may be thought as a flow definition for simplicity. It is - * sent to clients with each occasion to let them know what is current load - * situation on the server. By default, at the beginning, SLV on server is - * set max value which is calculated as the following: allow to one client - * have all locks of limit ->pl_limit for 10h. - * - * Next, on clients, number of cached locks is not limited artificially in any - * way as it was before. Instead, client calculates CLV, that is, client lock - * volume for each lock and compares it with last SLV from the server. CLV is - * calculated as the number of locks in LRU * lock live time in seconds. If - * CLV > SLV - lock is canceled. - * - * Client has LVF, that is, lock volume factor which regulates how much - * sensitive client should be about last SLV from server. The higher LVF is the - * more locks will be canceled on client. Default value for it is 1. Setting LVF - * to 2 means that client will cancel locks 2 times faster. - * - * Locks on a client will be canceled more intensively in these cases: - * (1) if SLV is smaller, that is, load is higher on the server; - * (2) client has a lot of locks (the more locks are held by client, the bigger - * chances that some of them should be canceled); - * (3) client has old locks (taken some time ago); - * - * Thus, according to flow paradigm that we use for better understanding SLV, - * CLV is the volume of particle in flow described by SLV. According to this, - * if flow is getting thinner, more and more particles become outside of it and - * as particles are locks, they should be canceled. - * - * General idea of this belongs to Vitaly Fertman (vitaly@clusterfs.com). - * Andreas Dilger (adilger@clusterfs.com) proposed few nice ideas like using - * LVF and many cleanups. Flow definition to allow more easy understanding of - * the logic belongs to Nikita Danilov (nikita@clusterfs.com) as well as many - * cleanups and fixes. And design and implementation are done by Yury Umanets - * (umka@clusterfs.com). - * - * Glossary for terms used: - * - * pl_limit - Number of allowed locks in pool. Applies to server and client - * side (tunable); - * - * pl_granted - Number of granted locks (calculated); - * pl_grant_rate - Number of granted locks for last T (calculated); - * pl_cancel_rate - Number of canceled locks for last T (calculated); - * pl_grant_speed - Grant speed (GR - CR) for last T (calculated); - * pl_grant_plan - Planned number of granted locks for next T (calculated); - * pl_server_lock_volume - Current server lock volume (calculated); - * - * As it may be seen from list above, we have few possible tunables which may - * affect behavior much. They all may be modified via sysfs. However, they also - * give a possibility for constructing few pre-defined behavior policies. If - * none of predefines is suitable for a working pattern being used, new one may - * be "constructed" via sysfs tunables. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include <lustre_dlm.h> -#include <cl_object.h> -#include <obd_class.h> -#include <obd_support.h> -#include "ldlm_internal.h" - -/* - * 50 ldlm locks for 1MB of RAM. - */ -#define LDLM_POOL_HOST_L ((NUM_CACHEPAGES >> (20 - PAGE_SHIFT)) * 50) - -/* - * Maximal possible grant step plan in %. - */ -#define LDLM_POOL_MAX_GSP (30) - -/* - * Minimal possible grant step plan in %. - */ -#define LDLM_POOL_MIN_GSP (1) - -/* - * This controls the speed of reaching LDLM_POOL_MAX_GSP - * with increasing thread period. - */ -#define LDLM_POOL_GSP_STEP_SHIFT (2) - -/* - * LDLM_POOL_GSP% of all locks is default GP. - */ -#define LDLM_POOL_GP(L) (((L) * LDLM_POOL_MAX_GSP) / 100) - -/* - * Max age for locks on clients. - */ -#define LDLM_POOL_MAX_AGE (36000) - -/* - * The granularity of SLV calculation. - */ -#define LDLM_POOL_SLV_SHIFT (10) - -static inline __u64 dru(__u64 val, __u32 shift, int round_up) -{ - return (val + (round_up ? (1 << shift) - 1 : 0)) >> shift; -} - -static inline __u64 ldlm_pool_slv_max(__u32 L) -{ - /* - * Allow to have all locks for 1 client for 10 hrs. - * Formula is the following: limit * 10h / 1 client. - */ - __u64 lim = (__u64)L * LDLM_POOL_MAX_AGE / 1; - return lim; -} - -static inline __u64 ldlm_pool_slv_min(__u32 L) -{ - return 1; -} - -enum { - LDLM_POOL_FIRST_STAT = 0, - LDLM_POOL_GRANTED_STAT = LDLM_POOL_FIRST_STAT, - LDLM_POOL_GRANT_STAT, - LDLM_POOL_CANCEL_STAT, - LDLM_POOL_GRANT_RATE_STAT, - LDLM_POOL_CANCEL_RATE_STAT, - LDLM_POOL_GRANT_PLAN_STAT, - LDLM_POOL_SLV_STAT, - LDLM_POOL_SHRINK_REQTD_STAT, - LDLM_POOL_SHRINK_FREED_STAT, - LDLM_POOL_RECALC_STAT, - LDLM_POOL_TIMING_STAT, - LDLM_POOL_LAST_STAT -}; - -/** - * Calculates suggested grant_step in % of available locks for passed - * \a period. This is later used in grant_plan calculations. - */ -static inline int ldlm_pool_t2gsp(unsigned int t) -{ - /* - * This yields 1% grant step for anything below LDLM_POOL_GSP_STEP - * and up to 30% for anything higher than LDLM_POOL_GSP_STEP. - * - * How this will affect execution is the following: - * - * - for thread period 1s we will have grant_step 1% which good from - * pov of taking some load off from server and push it out to clients. - * This is like that because 1% for grant_step means that server will - * not allow clients to get lots of locks in short period of time and - * keep all old locks in their caches. Clients will always have to - * get some locks back if they want to take some new; - * - * - for thread period 10s (which is default) we will have 23% which - * means that clients will have enough of room to take some new locks - * without getting some back. All locks from this 23% which were not - * taken by clients in current period will contribute in SLV growing. - * SLV growing means more locks cached on clients until limit or grant - * plan is reached. - */ - return LDLM_POOL_MAX_GSP - - ((LDLM_POOL_MAX_GSP - LDLM_POOL_MIN_GSP) >> - (t >> LDLM_POOL_GSP_STEP_SHIFT)); -} - -/** - * Recalculates next stats on passed \a pl. - * - * \pre ->pl_lock is locked. - */ -static void ldlm_pool_recalc_stats(struct ldlm_pool *pl) -{ - int grant_plan = pl->pl_grant_plan; - __u64 slv = pl->pl_server_lock_volume; - int granted = atomic_read(&pl->pl_granted); - int grant_rate = atomic_read(&pl->pl_grant_rate); - int cancel_rate = atomic_read(&pl->pl_cancel_rate); - - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT, - slv); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANTED_STAT, - granted); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT, - grant_rate); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT, - grant_plan); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT, - cancel_rate); -} - -/** - * Sets SLV and Limit from container_of(pl, struct ldlm_namespace, - * ns_pool)->ns_obd tp passed \a pl. - */ -static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl) -{ - struct obd_device *obd; - - /* - * Get new SLV and Limit from obd which is updated with coming - * RPCs. - */ - obd = container_of(pl, struct ldlm_namespace, - ns_pool)->ns_obd; - read_lock(&obd->obd_pool_lock); - pl->pl_server_lock_volume = obd->obd_pool_slv; - atomic_set(&pl->pl_limit, obd->obd_pool_limit); - read_unlock(&obd->obd_pool_lock); -} - -/** - * Recalculates client size pool \a pl according to current SLV and Limit. - */ -static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) -{ - time64_t recalc_interval_sec; - int ret; - - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec < pl->pl_recalc_period) - return 0; - - spin_lock(&pl->pl_lock); - /* - * Check if we need to recalc lists now. - */ - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec < pl->pl_recalc_period) { - spin_unlock(&pl->pl_lock); - return 0; - } - - /* - * Make sure that pool knows last SLV and Limit from obd. - */ - ldlm_cli_pool_pop_slv(pl); - - spin_unlock(&pl->pl_lock); - - /* - * Do not cancel locks in case lru resize is disabled for this ns. - */ - if (!ns_connect_lru_resize(container_of(pl, struct ldlm_namespace, - ns_pool))) { - ret = 0; - goto out; - } - - /* - * In the time of canceling locks on client we do not need to maintain - * sharp timing, we only want to cancel locks asap according to new SLV. - * It may be called when SLV has changed much, this is why we do not - * take into account pl->pl_recalc_time here. - */ - ret = ldlm_cancel_lru(container_of(pl, struct ldlm_namespace, ns_pool), - 0, LCF_ASYNC, LDLM_LRU_FLAG_LRUR); - -out: - spin_lock(&pl->pl_lock); - /* - * Time of LRU resizing might be longer than period, - * so update after LRU resizing rather than before it. - */ - pl->pl_recalc_time = ktime_get_real_seconds(); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, - recalc_interval_sec); - spin_unlock(&pl->pl_lock); - return ret; -} - -/** - * This function is main entry point for memory pressure handling on client - * side. Main goal of this function is to cancel some number of locks on - * passed \a pl according to \a nr and \a gfp_mask. - */ -static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, - int nr, gfp_t gfp_mask) -{ - struct ldlm_namespace *ns; - int unused; - - ns = container_of(pl, struct ldlm_namespace, ns_pool); - - /* - * Do not cancel locks in case lru resize is disabled for this ns. - */ - if (!ns_connect_lru_resize(ns)) - return 0; - - /* - * Make sure that pool knows last SLV and Limit from obd. - */ - ldlm_cli_pool_pop_slv(pl); - - spin_lock(&ns->ns_lock); - unused = ns->ns_nr_unused; - spin_unlock(&ns->ns_lock); - - if (nr == 0) - return (unused / 100) * sysctl_vfs_cache_pressure; - else - return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_LRU_FLAG_SHRINK); -} - -static const struct ldlm_pool_ops ldlm_cli_pool_ops = { - .po_recalc = ldlm_cli_pool_recalc, - .po_shrink = ldlm_cli_pool_shrink -}; - -/** - * Pool recalc wrapper. Will call either client or server pool recalc callback - * depending what pool \a pl is used. - */ -static int ldlm_pool_recalc(struct ldlm_pool *pl) -{ - u32 recalc_interval_sec; - int count; - - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec > 0) { - spin_lock(&pl->pl_lock); - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; - - if (recalc_interval_sec > 0) { - /* - * Update pool statistics every 1s. - */ - ldlm_pool_recalc_stats(pl); - - /* - * Zero out all rates and speed for the last period. - */ - atomic_set(&pl->pl_grant_rate, 0); - atomic_set(&pl->pl_cancel_rate, 0); - } - spin_unlock(&pl->pl_lock); - } - - if (pl->pl_ops->po_recalc) { - count = pl->pl_ops->po_recalc(pl); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT, - count); - } - - recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() + - pl->pl_recalc_period; - if (recalc_interval_sec <= 0) { - /* DEBUG: should be re-removed after LU-4536 is fixed */ - CDEBUG(D_DLMTRACE, - "%s: Negative interval(%ld), too short period(%ld)\n", - pl->pl_name, (long)recalc_interval_sec, - (long)pl->pl_recalc_period); - - /* Prevent too frequent recalculation. */ - recalc_interval_sec = 1; - } - - return recalc_interval_sec; -} - -/* - * Pool shrink wrapper. Will call either client or server pool recalc callback - * depending what pool pl is used. When nr == 0, just return the number of - * freeable locks. Otherwise, return the number of canceled locks. - */ -static int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, gfp_t gfp_mask) -{ - int cancel = 0; - - if (pl->pl_ops->po_shrink) { - cancel = pl->pl_ops->po_shrink(pl, nr, gfp_mask); - if (nr > 0) { - lprocfs_counter_add(pl->pl_stats, - LDLM_POOL_SHRINK_REQTD_STAT, - nr); - lprocfs_counter_add(pl->pl_stats, - LDLM_POOL_SHRINK_FREED_STAT, - cancel); - CDEBUG(D_DLMTRACE, - "%s: request to shrink %d locks, shrunk %d\n", - pl->pl_name, nr, cancel); - } - } - return cancel; -} - -static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused) -{ - int granted, grant_rate, cancel_rate; - int grant_speed, lvf; - struct ldlm_pool *pl = m->private; - __u64 slv, clv; - __u32 limit; - - spin_lock(&pl->pl_lock); - slv = pl->pl_server_lock_volume; - clv = pl->pl_client_lock_volume; - limit = atomic_read(&pl->pl_limit); - granted = atomic_read(&pl->pl_granted); - grant_rate = atomic_read(&pl->pl_grant_rate); - cancel_rate = atomic_read(&pl->pl_cancel_rate); - grant_speed = grant_rate - cancel_rate; - lvf = atomic_read(&pl->pl_lock_volume_factor); - spin_unlock(&pl->pl_lock); - - seq_printf(m, "LDLM pool state (%s):\n" - " SLV: %llu\n" - " CLV: %llu\n" - " LVF: %d\n", - pl->pl_name, slv, clv, lvf); - - seq_printf(m, " GR: %d\n CR: %d\n GS: %d\n" - " G: %d\n L: %d\n", - grant_rate, cancel_rate, grant_speed, - granted, limit); - - return 0; -} - -LPROC_SEQ_FOPS_RO(lprocfs_pool_state); - -static ssize_t grant_speed_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, - pl_kobj); - - int grant_speed; - - spin_lock(&pl->pl_lock); - /* serialize with ldlm_pool_recalc */ - grant_speed = atomic_read(&pl->pl_grant_rate) - - atomic_read(&pl->pl_cancel_rate); - spin_unlock(&pl->pl_lock); - return sprintf(buf, "%d\n", grant_speed); -} -LUSTRE_RO_ATTR(grant_speed); - -LDLM_POOL_SYSFS_READER_SHOW(grant_plan, int); -LUSTRE_RO_ATTR(grant_plan); - -LDLM_POOL_SYSFS_READER_SHOW(recalc_period, int); -LDLM_POOL_SYSFS_WRITER_STORE(recalc_period, int); -LUSTRE_RW_ATTR(recalc_period); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(server_lock_volume, u64); -LUSTRE_RO_ATTR(server_lock_volume); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(limit, atomic); -LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(limit, atomic); -LUSTRE_RW_ATTR(limit); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(granted, atomic); -LUSTRE_RO_ATTR(granted); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(cancel_rate, atomic); -LUSTRE_RO_ATTR(cancel_rate); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(grant_rate, atomic); -LUSTRE_RO_ATTR(grant_rate); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(lock_volume_factor, atomic); -LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(lock_volume_factor, atomic); -LUSTRE_RW_ATTR(lock_volume_factor); - -#define LDLM_POOL_ADD_VAR(name, var, ops) \ - do { \ - snprintf(var_name, MAX_STRING_SIZE, #name); \ - pool_vars[0].data = var; \ - pool_vars[0].fops = ops; \ - ldebugfs_add_vars(pl->pl_debugfs_entry, pool_vars, NULL);\ - } while (0) - -/* These are for pools in /sys/fs/lustre/ldlm/namespaces/.../pool */ -static struct attribute *ldlm_pl_attrs[] = { - &lustre_attr_grant_speed.attr, - &lustre_attr_grant_plan.attr, - &lustre_attr_recalc_period.attr, - &lustre_attr_server_lock_volume.attr, - &lustre_attr_limit.attr, - &lustre_attr_granted.attr, - &lustre_attr_cancel_rate.attr, - &lustre_attr_grant_rate.attr, - &lustre_attr_lock_volume_factor.attr, - NULL, -}; - -static void ldlm_pl_release(struct kobject *kobj) -{ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, - pl_kobj); - complete(&pl->pl_kobj_unregister); -} - -static struct kobj_type ldlm_pl_ktype = { - .default_attrs = ldlm_pl_attrs, - .sysfs_ops = &lustre_sysfs_ops, - .release = ldlm_pl_release, -}; - -static int ldlm_pool_sysfs_init(struct ldlm_pool *pl) -{ - struct ldlm_namespace *ns = container_of(pl, struct ldlm_namespace, - ns_pool); - int err; - - init_completion(&pl->pl_kobj_unregister); - err = kobject_init_and_add(&pl->pl_kobj, &ldlm_pl_ktype, &ns->ns_kobj, - "pool"); - - return err; -} - -static int ldlm_pool_debugfs_init(struct ldlm_pool *pl) -{ - struct ldlm_namespace *ns = container_of(pl, struct ldlm_namespace, - ns_pool); - struct dentry *debugfs_ns_parent; - struct lprocfs_vars pool_vars[2]; - char *var_name = NULL; - int rc = 0; - - var_name = kzalloc(MAX_STRING_SIZE + 1, GFP_NOFS); - if (!var_name) - return -ENOMEM; - - debugfs_ns_parent = ns->ns_debugfs_entry; - if (IS_ERR_OR_NULL(debugfs_ns_parent)) { - CERROR("%s: debugfs entry is not initialized\n", - ldlm_ns_name(ns)); - rc = -EINVAL; - goto out_free_name; - } - pl->pl_debugfs_entry = ldebugfs_register("pool", debugfs_ns_parent, - NULL, NULL); - if (IS_ERR(pl->pl_debugfs_entry)) { - CERROR("LdebugFS failed in ldlm-pool-init\n"); - rc = PTR_ERR(pl->pl_debugfs_entry); - pl->pl_debugfs_entry = NULL; - goto out_free_name; - } - - var_name[MAX_STRING_SIZE] = '\0'; - memset(pool_vars, 0, sizeof(pool_vars)); - pool_vars[0].name = var_name; - - LDLM_POOL_ADD_VAR(state, pl, &lprocfs_pool_state_fops); - - pl->pl_stats = lprocfs_alloc_stats(LDLM_POOL_LAST_STAT - - LDLM_POOL_FIRST_STAT, 0); - if (!pl->pl_stats) { - rc = -ENOMEM; - goto out_free_name; - } - - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANTED_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "granted", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "grant", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "cancel", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "grant_rate", "locks/s"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "cancel_rate", "locks/s"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "grant_plan", "locks/s"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SLV_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "slv", "slv"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_REQTD_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "shrink_request", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_FREED_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "shrink_freed", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_RECALC_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "recalc_freed", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_TIMING_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "recalc_timing", "sec"); - rc = ldebugfs_register_stats(pl->pl_debugfs_entry, "stats", - pl->pl_stats); - -out_free_name: - kfree(var_name); - return rc; -} - -static void ldlm_pool_sysfs_fini(struct ldlm_pool *pl) -{ - kobject_put(&pl->pl_kobj); - wait_for_completion(&pl->pl_kobj_unregister); -} - -static void ldlm_pool_debugfs_fini(struct ldlm_pool *pl) -{ - if (pl->pl_stats) { - lprocfs_free_stats(&pl->pl_stats); - pl->pl_stats = NULL; - } - if (pl->pl_debugfs_entry) { - ldebugfs_remove(&pl->pl_debugfs_entry); - pl->pl_debugfs_entry = NULL; - } -} - -int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, - int idx, enum ldlm_side client) -{ - int rc; - - spin_lock_init(&pl->pl_lock); - atomic_set(&pl->pl_granted, 0); - pl->pl_recalc_time = ktime_get_real_seconds(); - atomic_set(&pl->pl_lock_volume_factor, 1); - - atomic_set(&pl->pl_grant_rate, 0); - atomic_set(&pl->pl_cancel_rate, 0); - pl->pl_grant_plan = LDLM_POOL_GP(LDLM_POOL_HOST_L); - - snprintf(pl->pl_name, sizeof(pl->pl_name), "ldlm-pool-%s-%d", - ldlm_ns_name(ns), idx); - - atomic_set(&pl->pl_limit, 1); - pl->pl_server_lock_volume = 0; - pl->pl_ops = &ldlm_cli_pool_ops; - pl->pl_recalc_period = LDLM_POOL_CLI_DEF_RECALC_PERIOD; - pl->pl_client_lock_volume = 0; - rc = ldlm_pool_debugfs_init(pl); - if (rc) - return rc; - - rc = ldlm_pool_sysfs_init(pl); - if (rc) - return rc; - - CDEBUG(D_DLMTRACE, "Lock pool %s is initialized\n", pl->pl_name); - - return rc; -} - -void ldlm_pool_fini(struct ldlm_pool *pl) -{ - ldlm_pool_sysfs_fini(pl); - ldlm_pool_debugfs_fini(pl); - - /* - * Pool should not be used after this point. We can't free it here as - * it lives in struct ldlm_namespace, but still interested in catching - * any abnormal using cases. - */ - POISON(pl, 0x5a, sizeof(*pl)); -} - -/** - * Add new taken ldlm lock \a lock into pool \a pl accounting. - */ -void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock) -{ - /* - * FLOCK locks are special in a sense that they are almost never - * cancelled, instead special kind of lock is used to drop them. - * also there is no LRU for flock locks, so no point in tracking - * them anyway. - */ - if (lock->l_resource->lr_type == LDLM_FLOCK) - return; - - atomic_inc(&pl->pl_granted); - atomic_inc(&pl->pl_grant_rate); - lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_GRANT_STAT); - /* - * Do not do pool recalc for client side as all locks which - * potentially may be canceled has already been packed into - * enqueue/cancel rpc. Also we do not want to run out of stack - * with too long call paths. - */ -} - -/** - * Remove ldlm lock \a lock from pool \a pl accounting. - */ -void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock) -{ - /* - * Filter out FLOCK locks. Read above comment in ldlm_pool_add(). - */ - if (lock->l_resource->lr_type == LDLM_FLOCK) - return; - - LASSERT(atomic_read(&pl->pl_granted) > 0); - atomic_dec(&pl->pl_granted); - atomic_inc(&pl->pl_cancel_rate); - - lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT); -} - -/** - * Returns current \a pl SLV. - * - * \pre ->pl_lock is not locked. - */ -__u64 ldlm_pool_get_slv(struct ldlm_pool *pl) -{ - __u64 slv; - - spin_lock(&pl->pl_lock); - slv = pl->pl_server_lock_volume; - spin_unlock(&pl->pl_lock); - return slv; -} - -/** - * Sets passed \a clv to \a pl. - * - * \pre ->pl_lock is not locked. - */ -void ldlm_pool_set_clv(struct ldlm_pool *pl, __u64 clv) -{ - spin_lock(&pl->pl_lock); - pl->pl_client_lock_volume = clv; - spin_unlock(&pl->pl_lock); -} - -/** - * Returns current LVF from \a pl. - */ -__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl) -{ - return atomic_read(&pl->pl_lock_volume_factor); -} - -static int ldlm_pool_granted(struct ldlm_pool *pl) -{ - return atomic_read(&pl->pl_granted); -} - -/* - * count locks from all namespaces (if possible). Returns number of - * cached locks. - */ -static unsigned long ldlm_pools_count(enum ldlm_side client, gfp_t gfp_mask) -{ - unsigned long total = 0; - int nr_ns; - struct ldlm_namespace *ns; - struct ldlm_namespace *ns_old = NULL; /* loop detection */ - - if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) - return 0; - - CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n", - client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); - - /* - * Find out how many resources we may release. - */ - for (nr_ns = ldlm_namespace_nr_read(client); - nr_ns > 0; nr_ns--) { - mutex_lock(ldlm_namespace_lock(client)); - if (list_empty(ldlm_namespace_list(client))) { - mutex_unlock(ldlm_namespace_lock(client)); - return 0; - } - ns = ldlm_namespace_first_locked(client); - - if (ns == ns_old) { - mutex_unlock(ldlm_namespace_lock(client)); - break; - } - - if (ldlm_ns_empty(ns)) { - ldlm_namespace_move_to_inactive_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); - continue; - } - - if (!ns_old) - ns_old = ns; - - ldlm_namespace_get(ns); - ldlm_namespace_move_to_active_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); - total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask); - ldlm_namespace_put(ns); - } - - return total; -} - -static unsigned long ldlm_pools_scan(enum ldlm_side client, int nr, - gfp_t gfp_mask) -{ - unsigned long freed = 0; - int tmp, nr_ns; - struct ldlm_namespace *ns; - - if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) - return -1; - - /* - * Shrink at least ldlm_namespace_nr_read(client) namespaces. - */ - for (tmp = nr_ns = ldlm_namespace_nr_read(client); - tmp > 0; tmp--) { - int cancel, nr_locks; - - /* - * Do not call shrink under ldlm_namespace_lock(client) - */ - mutex_lock(ldlm_namespace_lock(client)); - if (list_empty(ldlm_namespace_list(client))) { - mutex_unlock(ldlm_namespace_lock(client)); - break; - } - ns = ldlm_namespace_first_locked(client); - ldlm_namespace_get(ns); - ldlm_namespace_move_to_active_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); - - nr_locks = ldlm_pool_granted(&ns->ns_pool); - /* - * We use to shrink propotionally but with new shrinker API, - * we lost the total number of freeable locks. - */ - cancel = 1 + min_t(int, nr_locks, nr / nr_ns); - freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); - ldlm_namespace_put(ns); - } - /* - * we only decrease the SLV in server pools shrinker, return - * SHRINK_STOP to kernel to avoid needless loop. LU-1128 - */ - return freed; -} - -static unsigned long ldlm_pools_cli_count(struct shrinker *s, - struct shrink_control *sc) -{ - return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask); -} - -static unsigned long ldlm_pools_cli_scan(struct shrinker *s, - struct shrink_control *sc) -{ - return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan, - sc->gfp_mask); -} - -static void ldlm_pools_recalc(struct work_struct *ws); -static DECLARE_DELAYED_WORK(ldlm_recalc_pools, ldlm_pools_recalc); - -static void ldlm_pools_recalc(struct work_struct *ws) -{ - enum ldlm_side client = LDLM_NAMESPACE_CLIENT; - struct ldlm_namespace *ns; - struct ldlm_namespace *ns_old = NULL; - /* seconds of sleep if no active namespaces */ - int time = LDLM_POOL_CLI_DEF_RECALC_PERIOD; - int nr; - - /* - * Recalc at least ldlm_namespace_nr_read(client) namespaces. - */ - for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) { - int skip; - /* - * Lock the list, get first @ns in the list, getref, move it - * to the tail, unlock and call pool recalc. This way we avoid - * calling recalc under @ns lock what is really good as we get - * rid of potential deadlock on client nodes when canceling - * locks synchronously. - */ - mutex_lock(ldlm_namespace_lock(client)); - if (list_empty(ldlm_namespace_list(client))) { - mutex_unlock(ldlm_namespace_lock(client)); - break; - } - ns = ldlm_namespace_first_locked(client); - - if (ns_old == ns) { /* Full pass complete */ - mutex_unlock(ldlm_namespace_lock(client)); - break; - } - - /* We got an empty namespace, need to move it back to inactive - * list. - * The race with parallel resource creation is fine: - * - If they do namespace_get before our check, we fail the - * check and they move this item to the end of the list anyway - * - If we do the check and then they do namespace_get, then - * we move the namespace to inactive and they will move - * it back to active (synchronised by the lock, so no clash - * there). - */ - if (ldlm_ns_empty(ns)) { - ldlm_namespace_move_to_inactive_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); - continue; - } - - if (!ns_old) - ns_old = ns; - - spin_lock(&ns->ns_lock); - /* - * skip ns which is being freed, and we don't want to increase - * its refcount again, not even temporarily. bz21519 & LU-499. - */ - if (ns->ns_stopping) { - skip = 1; - } else { - skip = 0; - ldlm_namespace_get(ns); - } - spin_unlock(&ns->ns_lock); - - ldlm_namespace_move_to_active_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); - - /* - * After setup is done - recalc the pool. - */ - if (!skip) { - int ttime = ldlm_pool_recalc(&ns->ns_pool); - - if (ttime < time) - time = ttime; - - ldlm_namespace_put(ns); - } - } - - /* Wake up the blocking threads from time to time. */ - ldlm_bl_thread_wakeup(); - - schedule_delayed_work(&ldlm_recalc_pools, time * HZ); -} - -static int ldlm_pools_thread_start(void) -{ - schedule_delayed_work(&ldlm_recalc_pools, 0); - - return 0; -} - -static void ldlm_pools_thread_stop(void) -{ - cancel_delayed_work_sync(&ldlm_recalc_pools); -} - -static struct shrinker ldlm_pools_cli_shrinker = { - .count_objects = ldlm_pools_cli_count, - .scan_objects = ldlm_pools_cli_scan, - .seeks = DEFAULT_SEEKS, -}; - -int ldlm_pools_init(void) -{ - int rc; - - rc = ldlm_pools_thread_start(); - if (!rc) - rc = register_shrinker(&ldlm_pools_cli_shrinker); - - return rc; -} - -void ldlm_pools_fini(void) -{ - unregister_shrinker(&ldlm_pools_cli_shrinker); - - ldlm_pools_thread_stop(); -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c deleted file mode 100644 index c3c9186b74ce..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c +++ /dev/null @@ -1,2080 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/** - * This file contains Asynchronous System Trap (AST) handlers and related - * LDLM request-processing routines. - * - * An AST is a callback issued on a lock when its state is changed. There are - * several different types of ASTs (callbacks) registered for each lock: - * - * - completion AST: when a lock is enqueued by some process, but cannot be - * granted immediately due to other conflicting locks on the same resource, - * the completion AST is sent to notify the caller when the lock is - * eventually granted - * - * - blocking AST: when a lock is granted to some process, if another process - * enqueues a conflicting (blocking) lock on a resource, a blocking AST is - * sent to notify the holder(s) of the lock(s) of the conflicting lock - * request. The lock holder(s) must release their lock(s) on that resource in - * a timely manner or be evicted by the server. - * - * - glimpse AST: this is used when a process wants information about a lock - * (i.e. the lock value block (LVB)) but does not necessarily require holding - * the lock. If the resource is locked, the lock holder(s) are sent glimpse - * ASTs and the LVB is returned to the caller, and lock holder(s) may CANCEL - * their lock(s) if they are idle. If the resource is not locked, the server - * may grant the lock. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include <lustre_errno.h> -#include <lustre_dlm.h> -#include <obd_class.h> -#include <obd.h> - -#include "ldlm_internal.h" - -unsigned int ldlm_enqueue_min = OBD_TIMEOUT_DEFAULT; -module_param(ldlm_enqueue_min, uint, 0644); -MODULE_PARM_DESC(ldlm_enqueue_min, "lock enqueue timeout minimum"); - -/* in client side, whether the cached locks will be canceled before replay */ -unsigned int ldlm_cancel_unused_locks_before_replay = 1; - -struct ldlm_async_args { - struct lustre_handle lock_handle; -}; - -/** - * ldlm_request_bufsize - * - * @count: number of ldlm handles - * @type: ldlm opcode - * - * If opcode=LDLM_ENQUEUE, 1 slot is already occupied, - * LDLM_LOCKREQ_HANDLE -1 slots are available. - * Otherwise, LDLM_LOCKREQ_HANDLE slots are available. - * - * Return: size of the request buffer - */ -static int ldlm_request_bufsize(int count, int type) -{ - int avail = LDLM_LOCKREQ_HANDLES; - - if (type == LDLM_ENQUEUE) - avail -= LDLM_ENQUEUE_CANCEL_OFF; - - if (count > avail) - avail = (count - avail) * sizeof(struct lustre_handle); - else - avail = 0; - - return sizeof(struct ldlm_request) + avail; -} - -static void ldlm_expired_completion_wait(struct ldlm_lock *lock, __u32 conn_cnt) -{ - struct obd_import *imp; - struct obd_device *obd; - - if (!lock->l_conn_export) { - static unsigned long next_dump, last_dump; - - LDLM_ERROR(lock, - "lock timed out (enqueued at %lld, %llds ago); not entering recovery in server code, just going back to sleep", - (s64)lock->l_last_activity, - (s64)(ktime_get_real_seconds() - - lock->l_last_activity)); - if (cfs_time_after(cfs_time_current(), next_dump)) { - last_dump = next_dump; - next_dump = cfs_time_shift(300); - ldlm_namespace_dump(D_DLMTRACE, - ldlm_lock_to_ns(lock)); - if (last_dump == 0) - libcfs_debug_dumplog(); - } - return; - } - - obd = lock->l_conn_export->exp_obd; - imp = obd->u.cli.cl_import; - ptlrpc_fail_import(imp, conn_cnt); - LDLM_ERROR(lock, - "lock timed out (enqueued at %lld, %llds ago), entering recovery for %s@%s", - (s64)lock->l_last_activity, - (s64)(ktime_get_real_seconds() - lock->l_last_activity), - obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid); -} - -/** - * Calculate the Completion timeout (covering enqueue, BL AST, data flush, - * lock cancel, and their replies). Used for lock completion timeout on the - * client side. - * - * \param[in] lock lock which is waiting the completion callback - * - * \retval timeout in seconds to wait for the server reply - */ -/* We use the same basis for both server side and client side functions - * from a single node. - */ -static unsigned int ldlm_cp_timeout(struct ldlm_lock *lock) -{ - unsigned int timeout; - - if (AT_OFF) - return obd_timeout; - - /* - * Wait a long time for enqueue - server may have to callback a - * lock from another client. Server will evict the other client if it - * doesn't respond reasonably, and then give us the lock. - */ - timeout = at_get(ldlm_lock_to_ns_at(lock)); - return max(3 * timeout, ldlm_enqueue_min); -} - -/** - * Helper function for ldlm_completion_ast(), updating timings when lock is - * actually granted. - */ -static int ldlm_completion_tail(struct ldlm_lock *lock, void *data) -{ - long delay; - int result = 0; - - if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) { - LDLM_DEBUG(lock, "client-side enqueue: destroyed"); - result = -EIO; - } else if (!data) { - LDLM_DEBUG(lock, "client-side enqueue: granted"); - } else { - /* Take into AT only CP RPC, not immediately granted locks */ - delay = ktime_get_real_seconds() - lock->l_last_activity; - LDLM_DEBUG(lock, "client-side enqueue: granted after %lds", - delay); - - /* Update our time estimate */ - at_measured(ldlm_lock_to_ns_at(lock), delay); - } - return result; -} - -/** - * Implementation of ->l_completion_ast() for a client, that doesn't wait - * until lock is granted. Suitable for locks enqueued through ptlrpcd, of - * other threads that cannot block for long. - */ -int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data) -{ - if (flags == LDLM_FL_WAIT_NOREPROC) { - LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock"); - return 0; - } - - if (!(flags & LDLM_FL_BLOCKED_MASK)) { - wake_up(&lock->l_waitq); - return ldlm_completion_tail(lock, data); - } - - LDLM_DEBUG(lock, - "client-side enqueue returned a blocked lock, going forward"); - return 0; -} -EXPORT_SYMBOL(ldlm_completion_ast_async); - -/** - * Generic LDLM "completion" AST. This is called in several cases: - * - * - when a reply to an ENQUEUE RPC is received from the server - * (ldlm_cli_enqueue_fini()). Lock might be granted or not granted at - * this point (determined by flags); - * - * - when LDLM_CP_CALLBACK RPC comes to client to notify it that lock has - * been granted; - * - * - when ldlm_lock_match(LDLM_FL_LVB_READY) is about to wait until lock - * gets correct lvb; - * - * - to force all locks when resource is destroyed (cleanup_resource()); - * - * - during lock conversion (not used currently). - * - * If lock is not granted in the first case, this function waits until second - * or penultimate cases happen in some other thread. - * - */ -int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) -{ - /* XXX ALLOCATE - 160 bytes */ - struct obd_device *obd; - struct obd_import *imp = NULL; - __u32 timeout; - __u32 conn_cnt = 0; - int rc = 0; - - if (flags == LDLM_FL_WAIT_NOREPROC) { - LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock"); - goto noreproc; - } - - if (!(flags & LDLM_FL_BLOCKED_MASK)) { - wake_up(&lock->l_waitq); - return 0; - } - - LDLM_DEBUG(lock, - "client-side enqueue returned a blocked lock, sleeping"); - -noreproc: - - obd = class_exp2obd(lock->l_conn_export); - - /* if this is a local lock, then there is no import */ - if (obd) - imp = obd->u.cli.cl_import; - - timeout = ldlm_cp_timeout(lock); - - lock->l_last_activity = ktime_get_real_seconds(); - - if (imp) { - spin_lock(&imp->imp_lock); - conn_cnt = imp->imp_conn_cnt; - spin_unlock(&imp->imp_lock); - } - if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST, - OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) { - ldlm_set_fail_loc(lock); - rc = -EINTR; - } else { - /* Go to sleep until the lock is granted or canceled. */ - if (!ldlm_is_no_timeout(lock)) { - /* Wait uninterruptible for a while first */ - rc = wait_event_idle_timeout(lock->l_waitq, - is_granted_or_cancelled(lock), - timeout * HZ); - if (rc == 0) - ldlm_expired_completion_wait(lock, conn_cnt); - } - /* Now wait abortable */ - if (rc == 0) - rc = l_wait_event_abortable(lock->l_waitq, - is_granted_or_cancelled(lock)); - else - rc = 0; - } - - if (rc) { - LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", - rc); - return rc; - } - - return ldlm_completion_tail(lock, data); -} -EXPORT_SYMBOL(ldlm_completion_ast); - -static void failed_lock_cleanup(struct ldlm_namespace *ns, - struct ldlm_lock *lock, int mode) -{ - int need_cancel = 0; - - /* Set a flag to prevent us from sending a CANCEL (bug 407) */ - lock_res_and_lock(lock); - /* Check that lock is not granted or failed, we might race. */ - if ((lock->l_req_mode != lock->l_granted_mode) && - !ldlm_is_failed(lock)) { - /* Make sure that this lock will not be found by raced - * bl_ast and -EINVAL reply is sent to server anyways. - * bug 17645 - */ - lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED | - LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING; - need_cancel = 1; - } - unlock_res_and_lock(lock); - - if (need_cancel) - LDLM_DEBUG(lock, - "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING"); - else - LDLM_DEBUG(lock, "lock was granted or failed in race"); - - /* XXX - HACK because we shouldn't call ldlm_lock_destroy() - * from llite/file.c/ll_file_flock(). - */ - /* This code makes for the fact that we do not have blocking handler on - * a client for flock locks. As such this is the place where we must - * completely kill failed locks. (interrupted and those that - * were waiting to be granted when server evicted us. - */ - if (lock->l_resource->lr_type == LDLM_FLOCK) { - lock_res_and_lock(lock); - if (!ldlm_is_destroyed(lock)) { - ldlm_resource_unlink_lock(lock); - ldlm_lock_decref_internal_nolock(lock, mode); - ldlm_lock_destroy_nolock(lock); - } - unlock_res_and_lock(lock); - } else { - ldlm_lock_decref_internal(lock, mode); - } -} - -/** - * Finishing portion of client lock enqueue code. - * - * Called after receiving reply from server. - */ -int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, - enum ldlm_type type, __u8 with_policy, - enum ldlm_mode mode, - __u64 *flags, void *lvb, __u32 lvb_len, - const struct lustre_handle *lockh, int rc) -{ - struct ldlm_namespace *ns = exp->exp_obd->obd_namespace; - int is_replay = *flags & LDLM_FL_REPLAY; - struct ldlm_lock *lock; - struct ldlm_reply *reply; - int cleanup_phase = 1; - - lock = ldlm_handle2lock(lockh); - /* ldlm_cli_enqueue is holding a reference on this lock. */ - if (!lock) { - LASSERT(type == LDLM_FLOCK); - return -ENOLCK; - } - - LASSERTF(ergo(lvb_len != 0, lvb_len == lock->l_lvb_len), - "lvb_len = %d, l_lvb_len = %d\n", lvb_len, lock->l_lvb_len); - - if (rc != ELDLM_OK) { - LASSERT(!is_replay); - LDLM_DEBUG(lock, "client-side enqueue END (%s)", - rc == ELDLM_LOCK_ABORTED ? "ABORTED" : "FAILED"); - - if (rc != ELDLM_LOCK_ABORTED) - goto cleanup; - } - - /* Before we return, swab the reply */ - reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); - if (!reply) { - rc = -EPROTO; - goto cleanup; - } - - if (lvb_len > 0) { - int size = 0; - - size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, - RCL_SERVER); - if (size < 0) { - LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", size); - rc = size; - goto cleanup; - } else if (unlikely(size > lvb_len)) { - LDLM_ERROR(lock, - "Replied LVB is larger than expectation, expected = %d, replied = %d", - lvb_len, size); - rc = -EINVAL; - goto cleanup; - } - lvb_len = size; - } - - if (rc == ELDLM_LOCK_ABORTED) { - if (lvb_len > 0 && lvb) - rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER, - lvb, lvb_len); - if (rc == 0) - rc = ELDLM_LOCK_ABORTED; - goto cleanup; - } - - /* lock enqueued on the server */ - cleanup_phase = 0; - - lock_res_and_lock(lock); - /* Key change rehash lock in per-export hash with new key */ - if (exp->exp_lock_hash) { - /* In the function below, .hs_keycmp resolves to - * ldlm_export_lock_keycmp() - */ - /* coverity[overrun-buffer-val] */ - cfs_hash_rehash_key(exp->exp_lock_hash, - &lock->l_remote_handle, - &reply->lock_handle, - &lock->l_exp_hash); - } else { - lock->l_remote_handle = reply->lock_handle; - } - - *flags = ldlm_flags_from_wire(reply->lock_flags); - lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags & - LDLM_FL_INHERIT_MASK); - unlock_res_and_lock(lock); - - CDEBUG(D_INFO, "local: %p, remote cookie: %#llx, flags: 0x%llx\n", - lock, reply->lock_handle.cookie, *flags); - - /* If enqueue returned a blocked lock but the completion handler has - * already run, then it fixed up the resource and we don't need to do it - * again. - */ - if ((*flags) & LDLM_FL_LOCK_CHANGED) { - int newmode = reply->lock_desc.l_req_mode; - - LASSERT(!is_replay); - if (newmode && newmode != lock->l_req_mode) { - LDLM_DEBUG(lock, "server returned different mode %s", - ldlm_lockname[newmode]); - lock->l_req_mode = newmode; - } - - if (!ldlm_res_eq(&reply->lock_desc.l_resource.lr_name, - &lock->l_resource->lr_name)) { - CDEBUG(D_INFO, - "remote intent success, locking " DLDLMRES " instead of " DLDLMRES "\n", - PLDLMRES(&reply->lock_desc.l_resource), - PLDLMRES(lock->l_resource)); - - rc = ldlm_lock_change_resource(ns, lock, - &reply->lock_desc.l_resource.lr_name); - if (rc || !lock->l_resource) { - rc = -ENOMEM; - goto cleanup; - } - LDLM_DEBUG(lock, "client-side enqueue, new resource"); - } - if (with_policy) - if (!(type == LDLM_IBITS && - !(exp_connect_flags(exp) & OBD_CONNECT_IBITS))) - /* We assume lock type cannot change on server*/ - ldlm_convert_policy_to_local(exp, - lock->l_resource->lr_type, - &reply->lock_desc.l_policy_data, - &lock->l_policy_data); - if (type != LDLM_PLAIN) - LDLM_DEBUG(lock, - "client-side enqueue, new policy data"); - } - - if ((*flags) & LDLM_FL_AST_SENT) { - lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST; - unlock_res_and_lock(lock); - LDLM_DEBUG(lock, "enqueue reply includes blocking AST"); - } - - /* If the lock has already been granted by a completion AST, don't - * clobber the LVB with an older one. - */ - if (lvb_len > 0) { - /* We must lock or a racing completion might update lvb without - * letting us know and we'll clobber the correct value. - * Cannot unlock after the check either, as that still leaves - * a tiny window for completion to get in - */ - lock_res_and_lock(lock); - if (lock->l_req_mode != lock->l_granted_mode) - rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER, - lock->l_lvb_data, lvb_len); - unlock_res_and_lock(lock); - if (rc < 0) { - cleanup_phase = 1; - goto cleanup; - } - } - - if (!is_replay) { - rc = ldlm_lock_enqueue(ns, &lock, NULL, flags); - if (lock->l_completion_ast) { - int err = lock->l_completion_ast(lock, *flags, NULL); - - if (!rc) - rc = err; - if (rc) - cleanup_phase = 1; - } - } - - if (lvb_len > 0 && lvb) { - /* Copy the LVB here, and not earlier, because the completion - * AST (if any) can override what we got in the reply - */ - memcpy(lvb, lock->l_lvb_data, lvb_len); - } - - LDLM_DEBUG(lock, "client-side enqueue END"); -cleanup: - if (cleanup_phase == 1 && rc) - failed_lock_cleanup(ns, lock, mode); - /* Put lock 2 times, the second reference is held by ldlm_cli_enqueue */ - LDLM_LOCK_PUT(lock); - LDLM_LOCK_RELEASE(lock); - return rc; -} -EXPORT_SYMBOL(ldlm_cli_enqueue_fini); - -/** - * Estimate number of lock handles that would fit into request of given - * size. PAGE_SIZE-512 is to allow TCP/IP and LNET headers to fit into - * a single page on the send/receive side. XXX: 512 should be changed to - * more adequate value. - */ -static inline int ldlm_req_handles_avail(int req_size, int off) -{ - int avail; - - avail = min_t(int, LDLM_MAXREQSIZE, PAGE_SIZE - 512) - req_size; - if (likely(avail >= 0)) - avail /= (int)sizeof(struct lustre_handle); - else - avail = 0; - avail += LDLM_LOCKREQ_HANDLES - off; - - return avail; -} - -static inline int ldlm_capsule_handles_avail(struct req_capsule *pill, - enum req_location loc, - int off) -{ - u32 size = req_capsule_msg_size(pill, loc); - - return ldlm_req_handles_avail(size, off); -} - -static inline int ldlm_format_handles_avail(struct obd_import *imp, - const struct req_format *fmt, - enum req_location loc, int off) -{ - u32 size = req_capsule_fmt_size(imp->imp_msg_magic, fmt, loc); - - return ldlm_req_handles_avail(size, off); -} - -/** - * Cancel LRU locks and pack them into the enqueue request. Pack there the given - * \a count locks in \a cancels. - * - * This is to be called by functions preparing their own requests that - * might contain lists of locks to cancel in addition to actual operation - * that needs to be performed. - */ -int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req, - int version, int opc, int canceloff, - struct list_head *cancels, int count) -{ - struct ldlm_namespace *ns = exp->exp_obd->obd_namespace; - struct req_capsule *pill = &req->rq_pill; - struct ldlm_request *dlm = NULL; - int flags, avail, to_free, pack = 0; - LIST_HEAD(head); - int rc; - - if (!cancels) - cancels = &head; - if (ns_connect_cancelset(ns)) { - /* Estimate the amount of available space in the request. */ - req_capsule_filled_sizes(pill, RCL_CLIENT); - avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff); - - flags = ns_connect_lru_resize(ns) ? - LDLM_LRU_FLAG_LRUR_NO_WAIT : LDLM_LRU_FLAG_AGED; - to_free = !ns_connect_lru_resize(ns) && - opc == LDLM_ENQUEUE ? 1 : 0; - - /* Cancel LRU locks here _only_ if the server supports - * EARLY_CANCEL. Otherwise we have to send extra CANCEL - * RPC, which will make us slower. - */ - if (avail > count) - count += ldlm_cancel_lru_local(ns, cancels, to_free, - avail - count, 0, flags); - if (avail > count) - pack = count; - else - pack = avail; - req_capsule_set_size(pill, &RMF_DLM_REQ, RCL_CLIENT, - ldlm_request_bufsize(pack, opc)); - } - - rc = ptlrpc_request_pack(req, version, opc); - if (rc) { - ldlm_lock_list_put(cancels, l_bl_ast, count); - return rc; - } - - if (ns_connect_cancelset(ns)) { - if (canceloff) { - dlm = req_capsule_client_get(pill, &RMF_DLM_REQ); - LASSERT(dlm); - /* Skip first lock handler in ldlm_request_pack(), - * this method will increment @lock_count according - * to the lock handle amount actually written to - * the buffer. - */ - dlm->lock_count = canceloff; - } - /* Pack into the request @pack lock handles. */ - ldlm_cli_cancel_list(cancels, pack, req, 0); - /* Prepare and send separate cancel RPC for others. */ - ldlm_cli_cancel_list(cancels, count - pack, NULL, 0); - } else { - ldlm_lock_list_put(cancels, l_bl_ast, count); - } - return 0; -} -EXPORT_SYMBOL(ldlm_prep_elc_req); - -int ldlm_prep_enqueue_req(struct obd_export *exp, struct ptlrpc_request *req, - struct list_head *cancels, int count) -{ - return ldlm_prep_elc_req(exp, req, LUSTRE_DLM_VERSION, LDLM_ENQUEUE, - LDLM_ENQUEUE_CANCEL_OFF, cancels, count); -} -EXPORT_SYMBOL(ldlm_prep_enqueue_req); - -static struct ptlrpc_request *ldlm_enqueue_pack(struct obd_export *exp, - int lvb_len) -{ - struct ptlrpc_request *req; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE); - if (!req) - return ERR_PTR(-ENOMEM); - - rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); - if (rc) { - ptlrpc_request_free(req); - return ERR_PTR(rc); - } - - req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, lvb_len); - ptlrpc_request_set_replen(req); - return req; -} - -/** - * Client-side lock enqueue. - * - * If a request has some specific initialisation it is passed in \a reqp, - * otherwise it is created in ldlm_cli_enqueue. - * - * Supports sync and async requests, pass \a async flag accordingly. If a - * request was created in ldlm_cli_enqueue and it is the async request, - * pass it to the caller in \a reqp. - */ -int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, - struct ldlm_enqueue_info *einfo, - const struct ldlm_res_id *res_id, - union ldlm_policy_data const *policy, __u64 *flags, - void *lvb, __u32 lvb_len, enum lvb_type lvb_type, - struct lustre_handle *lockh, int async) -{ - struct ldlm_namespace *ns; - struct ldlm_lock *lock; - struct ldlm_request *body; - int is_replay = *flags & LDLM_FL_REPLAY; - int req_passed_in = 1; - int rc, err; - struct ptlrpc_request *req; - - ns = exp->exp_obd->obd_namespace; - - /* If we're replaying this lock, just check some invariants. - * If we're creating a new lock, get everything all setup nicely. - */ - if (is_replay) { - lock = ldlm_handle2lock_long(lockh, 0); - LASSERT(lock); - LDLM_DEBUG(lock, "client-side enqueue START"); - LASSERT(exp == lock->l_conn_export); - } else { - const struct ldlm_callback_suite cbs = { - .lcs_completion = einfo->ei_cb_cp, - .lcs_blocking = einfo->ei_cb_bl, - .lcs_glimpse = einfo->ei_cb_gl - }; - lock = ldlm_lock_create(ns, res_id, einfo->ei_type, - einfo->ei_mode, &cbs, einfo->ei_cbdata, - lvb_len, lvb_type); - if (IS_ERR(lock)) - return PTR_ERR(lock); - /* for the local lock, add the reference */ - ldlm_lock_addref_internal(lock, einfo->ei_mode); - ldlm_lock2handle(lock, lockh); - if (policy) - lock->l_policy_data = *policy; - - if (einfo->ei_type == LDLM_EXTENT) { - /* extent lock without policy is a bug */ - if (!policy) - LBUG(); - - lock->l_req_extent = policy->l_extent; - } - LDLM_DEBUG(lock, "client-side enqueue START, flags %llx", - *flags); - } - - lock->l_conn_export = exp; - lock->l_export = NULL; - lock->l_blocking_ast = einfo->ei_cb_bl; - lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL)); - lock->l_last_activity = ktime_get_real_seconds(); - - /* lock not sent to server yet */ - if (!reqp || !*reqp) { - req = ldlm_enqueue_pack(exp, lvb_len); - if (IS_ERR(req)) { - failed_lock_cleanup(ns, lock, einfo->ei_mode); - LDLM_LOCK_RELEASE(lock); - return PTR_ERR(req); - } - - req_passed_in = 0; - if (reqp) - *reqp = req; - } else { - int len; - - req = *reqp; - len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_REQ, - RCL_CLIENT); - LASSERTF(len >= sizeof(*body), "buflen[%d] = %d, not %d\n", - DLM_LOCKREQ_OFF, len, (int)sizeof(*body)); - } - - /* Dump lock data into the request buffer */ - body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); - ldlm_lock2desc(lock, &body->lock_desc); - body->lock_flags = ldlm_flags_to_wire(*flags); - body->lock_handle[0] = *lockh; - - if (async) { - LASSERT(reqp); - return 0; - } - - LDLM_DEBUG(lock, "sending request"); - - rc = ptlrpc_queue_wait(req); - - err = ldlm_cli_enqueue_fini(exp, req, einfo->ei_type, policy ? 1 : 0, - einfo->ei_mode, flags, lvb, lvb_len, - lockh, rc); - - /* If ldlm_cli_enqueue_fini did not find the lock, we need to free - * one reference that we took - */ - if (err == -ENOLCK) - LDLM_LOCK_RELEASE(lock); - else - rc = err; - - if (!req_passed_in && req) { - ptlrpc_req_finished(req); - if (reqp) - *reqp = NULL; - } - - return rc; -} -EXPORT_SYMBOL(ldlm_cli_enqueue); - -/** - * Cancel locks locally. - * Returns: - * \retval LDLM_FL_LOCAL_ONLY if there is no need for a CANCEL RPC to the server - * \retval LDLM_FL_CANCELING otherwise; - * \retval LDLM_FL_BL_AST if there is a need for a separate CANCEL RPC. - */ -static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock) -{ - __u64 rc = LDLM_FL_LOCAL_ONLY; - - if (lock->l_conn_export) { - bool local_only; - - LDLM_DEBUG(lock, "client-side cancel"); - /* Set this flag to prevent others from getting new references*/ - lock_res_and_lock(lock); - ldlm_set_cbpending(lock); - local_only = !!(lock->l_flags & - (LDLM_FL_LOCAL_ONLY | LDLM_FL_CANCEL_ON_BLOCK)); - ldlm_cancel_callback(lock); - rc = ldlm_is_bl_ast(lock) ? LDLM_FL_BL_AST : LDLM_FL_CANCELING; - unlock_res_and_lock(lock); - - if (local_only) { - CDEBUG(D_DLMTRACE, - "not sending request (at caller's instruction)\n"); - rc = LDLM_FL_LOCAL_ONLY; - } - ldlm_lock_cancel(lock); - } else { - LDLM_ERROR(lock, "Trying to cancel local lock"); - LBUG(); - } - - return rc; -} - -/** - * Pack \a count locks in \a head into ldlm_request buffer of request \a req. - */ -static void ldlm_cancel_pack(struct ptlrpc_request *req, - struct list_head *head, int count) -{ - struct ldlm_request *dlm; - struct ldlm_lock *lock; - int max, packed = 0; - - dlm = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); - LASSERT(dlm); - - /* Check the room in the request buffer. */ - max = req_capsule_get_size(&req->rq_pill, &RMF_DLM_REQ, RCL_CLIENT) - - sizeof(struct ldlm_request); - max /= sizeof(struct lustre_handle); - max += LDLM_LOCKREQ_HANDLES; - LASSERT(max >= dlm->lock_count + count); - - /* XXX: it would be better to pack lock handles grouped by resource. - * so that the server cancel would call filter_lvbo_update() less - * frequently. - */ - list_for_each_entry(lock, head, l_bl_ast) { - if (!count--) - break; - LASSERT(lock->l_conn_export); - /* Pack the lock handle to the given request buffer. */ - LDLM_DEBUG(lock, "packing"); - dlm->lock_handle[dlm->lock_count++] = lock->l_remote_handle; - packed++; - } - CDEBUG(D_DLMTRACE, "%d locks packed\n", packed); -} - -/** - * Prepare and send a batched cancel RPC. It will include \a count lock - * handles of locks given in \a cancels list. - */ -static int ldlm_cli_cancel_req(struct obd_export *exp, - struct list_head *cancels, - int count, enum ldlm_cancel_flags flags) -{ - struct ptlrpc_request *req = NULL; - struct obd_import *imp; - int free, sent = 0; - int rc = 0; - - LASSERT(exp); - LASSERT(count > 0); - - CFS_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL, cfs_fail_val); - - if (CFS_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_RACE)) - return count; - - free = ldlm_format_handles_avail(class_exp2cliimp(exp), - &RQF_LDLM_CANCEL, RCL_CLIENT, 0); - if (count > free) - count = free; - - while (1) { - imp = class_exp2cliimp(exp); - if (!imp || imp->imp_invalid) { - CDEBUG(D_DLMTRACE, - "skipping cancel on invalid import %p\n", imp); - return count; - } - - req = ptlrpc_request_alloc(imp, &RQF_LDLM_CANCEL); - if (!req) { - rc = -ENOMEM; - goto out; - } - - req_capsule_filled_sizes(&req->rq_pill, RCL_CLIENT); - req_capsule_set_size(&req->rq_pill, &RMF_DLM_REQ, RCL_CLIENT, - ldlm_request_bufsize(count, LDLM_CANCEL)); - - rc = ptlrpc_request_pack(req, LUSTRE_DLM_VERSION, LDLM_CANCEL); - if (rc) { - ptlrpc_request_free(req); - goto out; - } - - req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL; - req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL; - ptlrpc_at_set_req_timeout(req); - - ldlm_cancel_pack(req, cancels, count); - - ptlrpc_request_set_replen(req); - if (flags & LCF_ASYNC) { - ptlrpcd_add_req(req); - sent = count; - goto out; - } - - rc = ptlrpc_queue_wait(req); - if (rc == LUSTRE_ESTALE) { - CDEBUG(D_DLMTRACE, - "client/server (nid %s) out of sync -- not fatal\n", - libcfs_nid2str(req->rq_import-> - imp_connection->c_peer.nid)); - rc = 0; - } else if (rc == -ETIMEDOUT && /* check there was no reconnect*/ - req->rq_import_generation == imp->imp_generation) { - ptlrpc_req_finished(req); - continue; - } else if (rc != ELDLM_OK) { - /* -ESHUTDOWN is common on umount */ - CDEBUG_LIMIT(rc == -ESHUTDOWN ? D_DLMTRACE : D_ERROR, - "Got rc %d from cancel RPC: canceling anyway\n", - rc); - break; - } - sent = count; - break; - } - - ptlrpc_req_finished(req); -out: - return sent ? sent : rc; -} - -static inline struct ldlm_pool *ldlm_imp2pl(struct obd_import *imp) -{ - return &imp->imp_obd->obd_namespace->ns_pool; -} - -/** - * Update client's OBD pool related fields with new SLV and Limit from \a req. - */ -int ldlm_cli_update_pool(struct ptlrpc_request *req) -{ - struct obd_device *obd; - __u64 new_slv; - __u32 new_limit; - - if (unlikely(!req->rq_import || !req->rq_import->imp_obd || - !imp_connect_lru_resize(req->rq_import))) { - /* - * Do nothing for corner cases. - */ - return 0; - } - - /* In some cases RPC may contain SLV and limit zeroed out. This - * is the case when server does not support LRU resize feature. - * This is also possible in some recovery cases when server-side - * reqs have no reference to the OBD export and thus access to - * server-side namespace is not possible. - */ - if (lustre_msg_get_slv(req->rq_repmsg) == 0 || - lustre_msg_get_limit(req->rq_repmsg) == 0) { - DEBUG_REQ(D_HA, req, - "Zero SLV or Limit found (SLV: %llu, Limit: %u)", - lustre_msg_get_slv(req->rq_repmsg), - lustre_msg_get_limit(req->rq_repmsg)); - return 0; - } - - new_limit = lustre_msg_get_limit(req->rq_repmsg); - new_slv = lustre_msg_get_slv(req->rq_repmsg); - obd = req->rq_import->imp_obd; - - /* Set new SLV and limit in OBD fields to make them accessible - * to the pool thread. We do not access obd_namespace and pool - * directly here as there is no reliable way to make sure that - * they are still alive at cleanup time. Evil races are possible - * which may cause Oops at that time. - */ - write_lock(&obd->obd_pool_lock); - obd->obd_pool_slv = new_slv; - obd->obd_pool_limit = new_limit; - write_unlock(&obd->obd_pool_lock); - - return 0; -} - -/** - * Client side lock cancel. - * - * Lock must not have any readers or writers by this time. - */ -int ldlm_cli_cancel(const struct lustre_handle *lockh, - enum ldlm_cancel_flags cancel_flags) -{ - struct obd_export *exp; - int avail, flags, count = 1; - __u64 rc = 0; - struct ldlm_namespace *ns; - struct ldlm_lock *lock; - LIST_HEAD(cancels); - - lock = ldlm_handle2lock_long(lockh, 0); - if (!lock) { - LDLM_DEBUG_NOLOCK("lock is already being destroyed"); - return 0; - } - - lock_res_and_lock(lock); - /* Lock is being canceled and the caller doesn't want to wait */ - if (ldlm_is_canceling(lock) && (cancel_flags & LCF_ASYNC)) { - unlock_res_and_lock(lock); - LDLM_LOCK_RELEASE(lock); - return 0; - } - - ldlm_set_canceling(lock); - unlock_res_and_lock(lock); - - rc = ldlm_cli_cancel_local(lock); - if (rc == LDLM_FL_LOCAL_ONLY || cancel_flags & LCF_LOCAL) { - LDLM_LOCK_RELEASE(lock); - return 0; - } - /* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL - * RPC which goes to canceld portal, so we can cancel other LRU locks - * here and send them all as one LDLM_CANCEL RPC. - */ - LASSERT(list_empty(&lock->l_bl_ast)); - list_add(&lock->l_bl_ast, &cancels); - - exp = lock->l_conn_export; - if (exp_connect_cancelset(exp)) { - avail = ldlm_format_handles_avail(class_exp2cliimp(exp), - &RQF_LDLM_CANCEL, - RCL_CLIENT, 0); - LASSERT(avail > 0); - - ns = ldlm_lock_to_ns(lock); - flags = ns_connect_lru_resize(ns) ? - LDLM_LRU_FLAG_LRUR : LDLM_LRU_FLAG_AGED; - count += ldlm_cancel_lru_local(ns, &cancels, 0, avail - 1, - LCF_BL_AST, flags); - } - ldlm_cli_cancel_list(&cancels, count, NULL, cancel_flags); - return 0; -} -EXPORT_SYMBOL(ldlm_cli_cancel); - -/** - * Locally cancel up to \a count locks in list \a cancels. - * Return the number of cancelled locks. - */ -int ldlm_cli_cancel_list_local(struct list_head *cancels, int count, - enum ldlm_cancel_flags flags) -{ - LIST_HEAD(head); - struct ldlm_lock *lock, *next; - int left = 0, bl_ast = 0; - __u64 rc; - - left = count; - list_for_each_entry_safe(lock, next, cancels, l_bl_ast) { - if (left-- == 0) - break; - - if (flags & LCF_LOCAL) { - rc = LDLM_FL_LOCAL_ONLY; - ldlm_lock_cancel(lock); - } else { - rc = ldlm_cli_cancel_local(lock); - } - /* Until we have compound requests and can send LDLM_CANCEL - * requests batched with generic RPCs, we need to send cancels - * with the LDLM_FL_BL_AST flag in a separate RPC from - * the one being generated now. - */ - if (!(flags & LCF_BL_AST) && (rc == LDLM_FL_BL_AST)) { - LDLM_DEBUG(lock, "Cancel lock separately"); - list_del_init(&lock->l_bl_ast); - list_add(&lock->l_bl_ast, &head); - bl_ast++; - continue; - } - if (rc == LDLM_FL_LOCAL_ONLY) { - /* CANCEL RPC should not be sent to server. */ - list_del_init(&lock->l_bl_ast); - LDLM_LOCK_RELEASE(lock); - count--; - } - } - if (bl_ast > 0) { - count -= bl_ast; - ldlm_cli_cancel_list(&head, bl_ast, NULL, 0); - } - - return count; -} - -/** - * Cancel as many locks as possible w/o sending any RPCs (e.g. to write back - * dirty data, to close a file, ...) or waiting for any RPCs in-flight (e.g. - * readahead requests, ...) - */ -static enum ldlm_policy_res -ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock, - int unused, int added, int count) -{ - enum ldlm_policy_res result = LDLM_POLICY_CANCEL_LOCK; - - /* don't check added & count since we want to process all locks - * from unused list. - * It's fine to not take lock to access lock->l_resource since - * the lock has already been granted so it won't change. - */ - switch (lock->l_resource->lr_type) { - case LDLM_EXTENT: - case LDLM_IBITS: - if (ns->ns_cancel && ns->ns_cancel(lock) != 0) - break; - /* fall through */ - default: - result = LDLM_POLICY_SKIP_LOCK; - lock_res_and_lock(lock); - ldlm_set_skipped(lock); - unlock_res_and_lock(lock); - break; - } - - return result; -} - -/** - * Callback function for LRU-resize policy. Decides whether to keep - * \a lock in LRU for current \a LRU size \a unused, added in current - * scan \a added and number of locks to be preferably canceled \a count. - * - * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning - * - * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU - */ -static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) -{ - unsigned long cur = cfs_time_current(); - struct ldlm_pool *pl = &ns->ns_pool; - __u64 slv, lvf, lv; - unsigned long la; - - /* Stop LRU processing when we reach past @count or have checked all - * locks in LRU. - */ - if (count && added >= count) - return LDLM_POLICY_KEEP_LOCK; - - /* - * Despite of the LV, It doesn't make sense to keep the lock which - * is unused for ns_max_age time. - */ - if (cfs_time_after(cfs_time_current(), - cfs_time_add(lock->l_last_used, ns->ns_max_age))) - return LDLM_POLICY_CANCEL_LOCK; - - slv = ldlm_pool_get_slv(pl); - lvf = ldlm_pool_get_lvf(pl); - la = cfs_duration_sec(cfs_time_sub(cur, lock->l_last_used)); - lv = lvf * la * unused; - - /* Inform pool about current CLV to see it via debugfs. */ - ldlm_pool_set_clv(pl, lv); - - /* Stop when SLV is not yet come from server or lv is smaller than - * it is. - */ - if (slv == 0 || lv < slv) - return LDLM_POLICY_KEEP_LOCK; - - return LDLM_POLICY_CANCEL_LOCK; -} - -/** - * Callback function for debugfs used policy. Makes decision whether to keep - * \a lock in LRU for current \a LRU size \a unused, added in current scan \a - * added and number of locks to be preferably canceled \a count. - * - * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning - * - * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU - */ -static enum ldlm_policy_res ldlm_cancel_passed_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) -{ - /* Stop LRU processing when we reach past @count or have checked all - * locks in LRU. - */ - return (added >= count) ? - LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; -} - -/** - * Callback function for aged policy. Makes decision whether to keep \a lock in - * LRU for current LRU size \a unused, added in current scan \a added and - * number of locks to be preferably canceled \a count. - * - * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning - * - * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU - */ -static enum ldlm_policy_res ldlm_cancel_aged_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) -{ - if ((added >= count) && - time_before(cfs_time_current(), - cfs_time_add(lock->l_last_used, ns->ns_max_age))) - return LDLM_POLICY_KEEP_LOCK; - - return LDLM_POLICY_CANCEL_LOCK; -} - -static enum ldlm_policy_res -ldlm_cancel_lrur_no_wait_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) -{ - enum ldlm_policy_res result; - - result = ldlm_cancel_lrur_policy(ns, lock, unused, added, count); - if (result == LDLM_POLICY_KEEP_LOCK) - return result; - - return ldlm_cancel_no_wait_policy(ns, lock, unused, added, count); -} - -/** - * Callback function for default policy. Makes decision whether to keep \a lock - * in LRU for current LRU size \a unused, added in current scan \a added and - * number of locks to be preferably canceled \a count. - * - * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning - * - * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU - */ -static enum ldlm_policy_res -ldlm_cancel_default_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock, - int unused, int added, int count) -{ - /* Stop LRU processing when we reach past count or have checked all - * locks in LRU. - */ - return (added >= count) ? - LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; -} - -typedef enum ldlm_policy_res (*ldlm_cancel_lru_policy_t)( - struct ldlm_namespace *, - struct ldlm_lock *, int, - int, int); - -static ldlm_cancel_lru_policy_t -ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags) -{ - if (flags & LDLM_LRU_FLAG_NO_WAIT) - return ldlm_cancel_no_wait_policy; - - if (ns_connect_lru_resize(ns)) { - if (flags & LDLM_LRU_FLAG_SHRINK) - /* We kill passed number of old locks. */ - return ldlm_cancel_passed_policy; - else if (flags & LDLM_LRU_FLAG_LRUR) - return ldlm_cancel_lrur_policy; - else if (flags & LDLM_LRU_FLAG_PASSED) - return ldlm_cancel_passed_policy; - else if (flags & LDLM_LRU_FLAG_LRUR_NO_WAIT) - return ldlm_cancel_lrur_no_wait_policy; - } else { - if (flags & LDLM_LRU_FLAG_AGED) - return ldlm_cancel_aged_policy; - } - - return ldlm_cancel_default_policy; -} - -/** - * - Free space in LRU for \a count new locks, - * redundant unused locks are canceled locally; - * - also cancel locally unused aged locks; - * - do not cancel more than \a max locks; - * - GET the found locks and add them into the \a cancels list. - * - * A client lock can be added to the l_bl_ast list only when it is - * marked LDLM_FL_CANCELING. Otherwise, somebody is already doing - * CANCEL. There are the following use cases: - * ldlm_cancel_resource_local(), ldlm_cancel_lru_local() and - * ldlm_cli_cancel(), which check and set this flag properly. As any - * attempt to cancel a lock rely on this flag, l_bl_ast list is accessed - * later without any special locking. - * - * Calling policies for enabled LRU resize: - * ---------------------------------------- - * flags & LDLM_LRU_FLAG_LRUR - use LRU resize policy (SLV from server) to - * cancel not more than \a count locks; - * - * flags & LDLM_LRU_FLAG_PASSED - cancel \a count number of old locks (located - * at the beginning of LRU list); - * - * flags & LDLM_LRU_FLAG_SHRINK - cancel not more than \a count locks according - * to memory pressure policy function; - * - * flags & LDLM_LRU_FLAG_AGED - cancel \a count locks according to - * "aged policy". - * - * flags & LDLM_LRU_FLAG_NO_WAIT - cancel as many unused locks as possible - * (typically before replaying locks) w/o - * sending any RPCs or waiting for any - * outstanding RPC to complete. - */ -static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, - struct list_head *cancels, int count, int max, - int flags) -{ - ldlm_cancel_lru_policy_t pf; - struct ldlm_lock *lock, *next; - int added = 0, unused, remained; - int no_wait = flags & - (LDLM_LRU_FLAG_NO_WAIT | LDLM_LRU_FLAG_LRUR_NO_WAIT); - - spin_lock(&ns->ns_lock); - unused = ns->ns_nr_unused; - remained = unused; - - if (!ns_connect_lru_resize(ns)) - count += unused - ns->ns_max_unused; - - pf = ldlm_cancel_lru_policy(ns, flags); - LASSERT(pf); - - while (!list_empty(&ns->ns_unused_list)) { - enum ldlm_policy_res result; - time_t last_use = 0; - - /* all unused locks */ - if (remained-- <= 0) - break; - - /* For any flags, stop scanning if @max is reached. */ - if (max && added >= max) - break; - - list_for_each_entry_safe(lock, next, &ns->ns_unused_list, - l_lru) { - /* No locks which got blocking requests. */ - LASSERT(!ldlm_is_bl_ast(lock)); - - if (no_wait && ldlm_is_skipped(lock)) - /* already processed */ - continue; - - last_use = lock->l_last_used; - if (last_use == cfs_time_current()) - continue; - - /* Somebody is already doing CANCEL. No need for this - * lock in LRU, do not traverse it again. - */ - if (!ldlm_is_canceling(lock)) - break; - - ldlm_lock_remove_from_lru_nolock(lock); - } - if (&lock->l_lru == &ns->ns_unused_list) - break; - - LDLM_LOCK_GET(lock); - spin_unlock(&ns->ns_lock); - lu_ref_add(&lock->l_reference, __func__, current); - - /* Pass the lock through the policy filter and see if it - * should stay in LRU. - * - * Even for shrinker policy we stop scanning if - * we find a lock that should stay in the cache. - * We should take into account lock age anyway - * as a new lock is a valuable resource even if - * it has a low weight. - * - * That is, for shrinker policy we drop only - * old locks, but additionally choose them by - * their weight. Big extent locks will stay in - * the cache. - */ - result = pf(ns, lock, unused, added, count); - if (result == LDLM_POLICY_KEEP_LOCK) { - lu_ref_del(&lock->l_reference, - __func__, current); - LDLM_LOCK_RELEASE(lock); - spin_lock(&ns->ns_lock); - break; - } - if (result == LDLM_POLICY_SKIP_LOCK) { - lu_ref_del(&lock->l_reference, - __func__, current); - LDLM_LOCK_RELEASE(lock); - spin_lock(&ns->ns_lock); - continue; - } - - lock_res_and_lock(lock); - /* Check flags again under the lock. */ - if (ldlm_is_canceling(lock) || - (ldlm_lock_remove_from_lru_check(lock, last_use) == 0)) { - /* Another thread is removing lock from LRU, or - * somebody is already doing CANCEL, or there - * is a blocking request which will send cancel - * by itself, or the lock is no longer unused or - * the lock has been used since the pf() call and - * pages could be put under it. - */ - unlock_res_and_lock(lock); - lu_ref_del(&lock->l_reference, - __func__, current); - LDLM_LOCK_RELEASE(lock); - spin_lock(&ns->ns_lock); - continue; - } - LASSERT(!lock->l_readers && !lock->l_writers); - - /* If we have chosen to cancel this lock voluntarily, we - * better send cancel notification to server, so that it - * frees appropriate state. This might lead to a race - * where while we are doing cancel here, server is also - * silently cancelling this lock. - */ - ldlm_clear_cancel_on_block(lock); - - /* Setting the CBPENDING flag is a little misleading, - * but prevents an important race; namely, once - * CBPENDING is set, the lock can accumulate no more - * readers/writers. Since readers and writers are - * already zero here, ldlm_lock_decref() won't see - * this flag and call l_blocking_ast - */ - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING; - - /* We can't re-add to l_lru as it confuses the - * refcounting in ldlm_lock_remove_from_lru() if an AST - * arrives after we drop lr_lock below. We use l_bl_ast - * and can't use l_pending_chain as it is used both on - * server and client nevertheless bug 5666 says it is - * used only on server - */ - LASSERT(list_empty(&lock->l_bl_ast)); - list_add(&lock->l_bl_ast, cancels); - unlock_res_and_lock(lock); - lu_ref_del(&lock->l_reference, __func__, current); - spin_lock(&ns->ns_lock); - added++; - unused--; - } - spin_unlock(&ns->ns_lock); - return added; -} - -int ldlm_cancel_lru_local(struct ldlm_namespace *ns, - struct list_head *cancels, int count, int max, - enum ldlm_cancel_flags cancel_flags, int flags) -{ - int added; - - added = ldlm_prepare_lru_list(ns, cancels, count, max, flags); - if (added <= 0) - return added; - return ldlm_cli_cancel_list_local(cancels, added, cancel_flags); -} - -/** - * Cancel at least \a nr locks from given namespace LRU. - * - * When called with LCF_ASYNC the blocking callback will be handled - * in a thread and this function will return after the thread has been - * asked to call the callback. When called with LCF_ASYNC the blocking - * callback will be performed in this function. - */ -int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, - enum ldlm_cancel_flags cancel_flags, - int flags) -{ - LIST_HEAD(cancels); - int count, rc; - - /* Just prepare the list of locks, do not actually cancel them yet. - * Locks are cancelled later in a separate thread. - */ - count = ldlm_prepare_lru_list(ns, &cancels, nr, 0, flags); - rc = ldlm_bl_to_thread_list(ns, NULL, &cancels, count, cancel_flags); - if (rc == 0) - return count; - - return 0; -} - -/** - * Find and cancel locally unused locks found on resource, matched to the - * given policy, mode. GET the found locks and add them into the \a cancels - * list. - */ -int ldlm_cancel_resource_local(struct ldlm_resource *res, - struct list_head *cancels, - union ldlm_policy_data *policy, - enum ldlm_mode mode, __u64 lock_flags, - enum ldlm_cancel_flags cancel_flags, - void *opaque) -{ - struct ldlm_lock *lock; - int count = 0; - - lock_res(res); - list_for_each_entry(lock, &res->lr_granted, l_res_link) { - if (opaque && lock->l_ast_data != opaque) { - LDLM_ERROR(lock, "data %p doesn't match opaque %p", - lock->l_ast_data, opaque); - continue; - } - - if (lock->l_readers || lock->l_writers) - continue; - - /* If somebody is already doing CANCEL, or blocking AST came, - * skip this lock. - */ - if (ldlm_is_bl_ast(lock) || ldlm_is_canceling(lock)) - continue; - - if (lockmode_compat(lock->l_granted_mode, mode)) - continue; - - /* If policy is given and this is IBITS lock, add to list only - * those locks that match by policy. - */ - if (policy && (lock->l_resource->lr_type == LDLM_IBITS) && - !(lock->l_policy_data.l_inodebits.bits & - policy->l_inodebits.bits)) - continue; - - /* See CBPENDING comment in ldlm_cancel_lru */ - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING | - lock_flags; - - LASSERT(list_empty(&lock->l_bl_ast)); - list_add(&lock->l_bl_ast, cancels); - LDLM_LOCK_GET(lock); - count++; - } - unlock_res(res); - - return ldlm_cli_cancel_list_local(cancels, count, cancel_flags); -} -EXPORT_SYMBOL(ldlm_cancel_resource_local); - -/** - * Cancel client-side locks from a list and send/prepare cancel RPCs to the - * server. - * If \a req is NULL, send CANCEL request to server with handles of locks - * in the \a cancels. If EARLY_CANCEL is not supported, send CANCEL requests - * separately per lock. - * If \a req is not NULL, put handles of locks in \a cancels into the request - * buffer at the offset \a off. - * Destroy \a cancels at the end. - */ -int ldlm_cli_cancel_list(struct list_head *cancels, int count, - struct ptlrpc_request *req, - enum ldlm_cancel_flags flags) -{ - struct ldlm_lock *lock; - int res = 0; - - if (list_empty(cancels) || count == 0) - return 0; - - /* XXX: requests (both batched and not) could be sent in parallel. - * Usually it is enough to have just 1 RPC, but it is possible that - * there are too many locks to be cancelled in LRU or on a resource. - * It would also speed up the case when the server does not support - * the feature. - */ - while (count > 0) { - LASSERT(!list_empty(cancels)); - lock = list_first_entry(cancels, struct ldlm_lock, l_bl_ast); - LASSERT(lock->l_conn_export); - - if (exp_connect_cancelset(lock->l_conn_export)) { - res = count; - if (req) - ldlm_cancel_pack(req, cancels, count); - else - res = ldlm_cli_cancel_req(lock->l_conn_export, - cancels, count, - flags); - } else { - res = ldlm_cli_cancel_req(lock->l_conn_export, - cancels, 1, flags); - } - - if (res < 0) { - CDEBUG_LIMIT(res == -ESHUTDOWN ? D_DLMTRACE : D_ERROR, - "%s: %d\n", __func__, res); - res = count; - } - - count -= res; - ldlm_lock_list_put(cancels, l_bl_ast, res); - } - LASSERT(count == 0); - return 0; -} -EXPORT_SYMBOL(ldlm_cli_cancel_list); - -/** - * Cancel all locks on a resource that have 0 readers/writers. - * - * If flags & LDLM_FL_LOCAL_ONLY, throw the locks away without trying - * to notify the server. - */ -int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - enum ldlm_cancel_flags flags, - void *opaque) -{ - struct ldlm_resource *res; - LIST_HEAD(cancels); - int count; - int rc; - - res = ldlm_resource_get(ns, NULL, res_id, 0, 0); - if (IS_ERR(res)) { - /* This is not a problem. */ - CDEBUG(D_INFO, "No resource %llu\n", res_id->name[0]); - return 0; - } - - LDLM_RESOURCE_ADDREF(res); - count = ldlm_cancel_resource_local(res, &cancels, policy, mode, - 0, flags | LCF_BL_AST, opaque); - rc = ldlm_cli_cancel_list(&cancels, count, NULL, flags); - if (rc != ELDLM_OK) - CERROR("canceling unused lock " DLDLMRES ": rc = %d\n", - PLDLMRES(res), rc); - - LDLM_RESOURCE_DELREF(res); - ldlm_resource_putref(res); - return 0; -} -EXPORT_SYMBOL(ldlm_cli_cancel_unused_resource); - -struct ldlm_cli_cancel_arg { - int lc_flags; - void *lc_opaque; -}; - -static int ldlm_cli_hash_cancel_unused(struct cfs_hash *hs, - struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - struct ldlm_cli_cancel_arg *lc = arg; - - ldlm_cli_cancel_unused_resource(ldlm_res_to_ns(res), &res->lr_name, - NULL, LCK_MINMODE, - lc->lc_flags, lc->lc_opaque); - /* must return 0 for hash iteration */ - return 0; -} - -/** - * Cancel all locks on a namespace (or a specific resource, if given) - * that have 0 readers/writers. - * - * If flags & LCF_LOCAL, throw the locks away without trying - * to notify the server. - */ -int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - enum ldlm_cancel_flags flags, void *opaque) -{ - struct ldlm_cli_cancel_arg arg = { - .lc_flags = flags, - .lc_opaque = opaque, - }; - - if (!ns) - return ELDLM_OK; - - if (res_id) { - return ldlm_cli_cancel_unused_resource(ns, res_id, NULL, - LCK_MINMODE, flags, - opaque); - } else { - cfs_hash_for_each_nolock(ns->ns_rs_hash, - ldlm_cli_hash_cancel_unused, &arg, 0); - return ELDLM_OK; - } -} -EXPORT_SYMBOL(ldlm_cli_cancel_unused); - -/* Lock iterators. */ - -static int ldlm_resource_foreach(struct ldlm_resource *res, - ldlm_iterator_t iter, void *closure) -{ - struct ldlm_lock *tmp; - struct ldlm_lock *lock; - int rc = LDLM_ITER_CONTINUE; - - if (!res) - return LDLM_ITER_CONTINUE; - - lock_res(res); - list_for_each_entry_safe(lock, tmp, &res->lr_granted, l_res_link) { - if (iter(lock, closure) == LDLM_ITER_STOP) { - rc = LDLM_ITER_STOP; - goto out; - } - } - - list_for_each_entry_safe(lock, tmp, &res->lr_waiting, l_res_link) { - if (iter(lock, closure) == LDLM_ITER_STOP) { - rc = LDLM_ITER_STOP; - goto out; - } - } - out: - unlock_res(res); - return rc; -} - -struct iter_helper_data { - ldlm_iterator_t iter; - void *closure; -}; - -static int ldlm_iter_helper(struct ldlm_lock *lock, void *closure) -{ - struct iter_helper_data *helper = closure; - - return helper->iter(lock, helper->closure); -} - -static int ldlm_res_iter_helper(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) - -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - - return ldlm_resource_foreach(res, ldlm_iter_helper, arg) == - LDLM_ITER_STOP; -} - -static void ldlm_namespace_foreach(struct ldlm_namespace *ns, - ldlm_iterator_t iter, void *closure) - -{ - struct iter_helper_data helper = { - .iter = iter, - .closure = closure, - }; - - cfs_hash_for_each_nolock(ns->ns_rs_hash, - ldlm_res_iter_helper, &helper, 0); -} - -/* non-blocking function to manipulate a lock whose cb_data is being put away. - * return 0: find no resource - * > 0: must be LDLM_ITER_STOP/LDLM_ITER_CONTINUE. - * < 0: errors - */ -int ldlm_resource_iterate(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - ldlm_iterator_t iter, void *data) -{ - struct ldlm_resource *res; - int rc; - - LASSERTF(ns, "must pass in namespace\n"); - - res = ldlm_resource_get(ns, NULL, res_id, 0, 0); - if (IS_ERR(res)) - return 0; - - LDLM_RESOURCE_ADDREF(res); - rc = ldlm_resource_foreach(res, iter, data); - LDLM_RESOURCE_DELREF(res); - ldlm_resource_putref(res); - return rc; -} -EXPORT_SYMBOL(ldlm_resource_iterate); - -/* Lock replay */ - -static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure) -{ - struct list_head *list = closure; - - /* we use l_pending_chain here, because it's unused on clients. */ - LASSERTF(list_empty(&lock->l_pending_chain), - "lock %p next %p prev %p\n", - lock, &lock->l_pending_chain.next, - &lock->l_pending_chain.prev); - /* bug 9573: don't replay locks left after eviction, or - * bug 17614: locks being actively cancelled. Get a reference - * on a lock so that it does not disappear under us (e.g. due to cancel) - */ - if (!(lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_BL_DONE))) { - list_add(&lock->l_pending_chain, list); - LDLM_LOCK_GET(lock); - } - - return LDLM_ITER_CONTINUE; -} - -static int replay_lock_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - struct ldlm_async_args *aa, int rc) -{ - struct ldlm_lock *lock; - struct ldlm_reply *reply; - struct obd_export *exp; - - atomic_dec(&req->rq_import->imp_replay_inflight); - if (rc != ELDLM_OK) - goto out; - - reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); - if (!reply) { - rc = -EPROTO; - goto out; - } - - lock = ldlm_handle2lock(&aa->lock_handle); - if (!lock) { - CERROR("received replay ack for unknown local cookie %#llx remote cookie %#llx from server %s id %s\n", - aa->lock_handle.cookie, reply->lock_handle.cookie, - req->rq_export->exp_client_uuid.uuid, - libcfs_id2str(req->rq_peer)); - rc = -ESTALE; - goto out; - } - - /* Key change rehash lock in per-export hash with new key */ - exp = req->rq_export; - if (exp && exp->exp_lock_hash) { - /* In the function below, .hs_keycmp resolves to - * ldlm_export_lock_keycmp() - */ - /* coverity[overrun-buffer-val] */ - cfs_hash_rehash_key(exp->exp_lock_hash, - &lock->l_remote_handle, - &reply->lock_handle, - &lock->l_exp_hash); - } else { - lock->l_remote_handle = reply->lock_handle; - } - - LDLM_DEBUG(lock, "replayed lock:"); - ptlrpc_import_recovery_state_machine(req->rq_import); - LDLM_LOCK_PUT(lock); -out: - if (rc != ELDLM_OK) - ptlrpc_connect_import(req->rq_import); - - return rc; -} - -static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) -{ - struct ptlrpc_request *req; - struct ldlm_async_args *aa; - struct ldlm_request *body; - int flags; - - /* Bug 11974: Do not replay a lock which is actively being canceled */ - if (ldlm_is_bl_done(lock)) { - LDLM_DEBUG(lock, "Not replaying canceled lock:"); - return 0; - } - - /* If this is reply-less callback lock, we cannot replay it, since - * server might have long dropped it, but notification of that event was - * lost by network. (and server granted conflicting lock already) - */ - if (ldlm_is_cancel_on_block(lock)) { - LDLM_DEBUG(lock, "Not replaying reply-less lock:"); - ldlm_lock_cancel(lock); - return 0; - } - - /* - * If granted mode matches the requested mode, this lock is granted. - * - * If they differ, but we have a granted mode, then we were granted - * one mode and now want another: ergo, converting. - * - * If we haven't been granted anything and are on a resource list, - * then we're blocked/waiting. - * - * If we haven't been granted anything and we're NOT on a resource list, - * then we haven't got a reply yet and don't have a known disposition. - * This happens whenever a lock enqueue is the request that triggers - * recovery. - */ - if (lock->l_granted_mode == lock->l_req_mode) - flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_GRANTED; - else if (lock->l_granted_mode) - flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_CONV; - else if (!list_empty(&lock->l_res_link)) - flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT; - else - flags = LDLM_FL_REPLAY; - - req = ptlrpc_request_alloc_pack(imp, &RQF_LDLM_ENQUEUE, - LUSTRE_DLM_VERSION, LDLM_ENQUEUE); - if (!req) - return -ENOMEM; - - /* We're part of recovery, so don't wait for it. */ - req->rq_send_state = LUSTRE_IMP_REPLAY_LOCKS; - - body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); - ldlm_lock2desc(lock, &body->lock_desc); - body->lock_flags = ldlm_flags_to_wire(flags); - - ldlm_lock2handle(lock, &body->lock_handle[0]); - if (lock->l_lvb_len > 0) - req_capsule_extend(&req->rq_pill, &RQF_LDLM_ENQUEUE_LVB); - req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, - lock->l_lvb_len); - ptlrpc_request_set_replen(req); - /* notify the server we've replayed all requests. - * also, we mark the request to be put on a dedicated - * queue to be processed after all request replayes. - * bug 6063 - */ - lustre_msg_set_flags(req->rq_reqmsg, MSG_REQ_REPLAY_DONE); - - LDLM_DEBUG(lock, "replaying lock:"); - - atomic_inc(&req->rq_import->imp_replay_inflight); - BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); - aa = ptlrpc_req_async_args(req); - aa->lock_handle = body->lock_handle[0]; - req->rq_interpret_reply = (ptlrpc_interpterer_t)replay_lock_interpret; - ptlrpcd_add_req(req); - - return 0; -} - -/** - * Cancel as many unused locks as possible before replay. since we are - * in recovery, we can't wait for any outstanding RPCs to send any RPC - * to the server. - * - * Called only in recovery before replaying locks. there is no need to - * replay locks that are unused. since the clients may hold thousands of - * cached unused locks, dropping the unused locks can greatly reduce the - * load on the servers at recovery time. - */ -static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns) -{ - int canceled; - LIST_HEAD(cancels); - - CDEBUG(D_DLMTRACE, - "Dropping as many unused locks as possible before replay for namespace %s (%d)\n", - ldlm_ns_name(ns), ns->ns_nr_unused); - - /* We don't need to care whether or not LRU resize is enabled - * because the LDLM_LRU_FLAG_NO_WAIT policy doesn't use the - * count parameter - */ - canceled = ldlm_cancel_lru_local(ns, &cancels, ns->ns_nr_unused, 0, - LCF_LOCAL, LDLM_LRU_FLAG_NO_WAIT); - - CDEBUG(D_DLMTRACE, "Canceled %d unused locks from namespace %s\n", - canceled, ldlm_ns_name(ns)); -} - -int ldlm_replay_locks(struct obd_import *imp) -{ - struct ldlm_namespace *ns = imp->imp_obd->obd_namespace; - LIST_HEAD(list); - struct ldlm_lock *lock, *next; - int rc = 0; - - LASSERT(atomic_read(&imp->imp_replay_inflight) == 0); - - /* don't replay locks if import failed recovery */ - if (imp->imp_vbr_failed) - return 0; - - /* ensure this doesn't fall to 0 before all have been queued */ - atomic_inc(&imp->imp_replay_inflight); - - if (ldlm_cancel_unused_locks_before_replay) - ldlm_cancel_unused_locks_for_replay(ns); - - ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list); - - list_for_each_entry_safe(lock, next, &list, l_pending_chain) { - list_del_init(&lock->l_pending_chain); - if (rc) { - LDLM_LOCK_RELEASE(lock); - continue; /* or try to do the rest? */ - } - rc = replay_one_lock(imp, lock); - LDLM_LOCK_RELEASE(lock); - } - - atomic_dec(&imp->imp_replay_inflight); - - return rc; -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c deleted file mode 100644 index 4c44603ab6f9..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c +++ /dev/null @@ -1,1369 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_resource.c - * - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Peter Braam <braam@clusterfs.com> - */ - -#define DEBUG_SUBSYSTEM S_LDLM -#include <lustre_dlm.h> -#include <lustre_fid.h> -#include <obd_class.h> -#include "ldlm_internal.h" - -struct kmem_cache *ldlm_resource_slab, *ldlm_lock_slab; - -int ldlm_srv_namespace_nr; -int ldlm_cli_namespace_nr; - -struct mutex ldlm_srv_namespace_lock; -LIST_HEAD(ldlm_srv_namespace_list); - -struct mutex ldlm_cli_namespace_lock; -/* Client Namespaces that have active resources in them. - * Once all resources go away, ldlm_poold moves such namespaces to the - * inactive list - */ -LIST_HEAD(ldlm_cli_active_namespace_list); -/* Client namespaces that don't have any locks in them */ -static LIST_HEAD(ldlm_cli_inactive_namespace_list); - -static struct dentry *ldlm_debugfs_dir; -static struct dentry *ldlm_ns_debugfs_dir; -struct dentry *ldlm_svc_debugfs_dir; - -/* during debug dump certain amount of granted locks for one resource to avoid - * DDOS. - */ -static unsigned int ldlm_dump_granted_max = 256; - -static ssize_t -lprocfs_wr_dump_ns(struct file *file, const char __user *buffer, - size_t count, loff_t *off) -{ - ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE); - ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE); - return count; -} - -LPROC_SEQ_FOPS_WR_ONLY(ldlm, dump_ns); - -static int ldlm_rw_uint_seq_show(struct seq_file *m, void *v) -{ - seq_printf(m, "%u\n", *(unsigned int *)m->private); - return 0; -} - -static ssize_t -ldlm_rw_uint_seq_write(struct file *file, const char __user *buffer, - size_t count, loff_t *off) -{ - struct seq_file *seq = file->private_data; - - if (count == 0) - return 0; - return kstrtouint_from_user(buffer, count, 0, - (unsigned int *)seq->private); -} - -LPROC_SEQ_FOPS(ldlm_rw_uint); - -static struct lprocfs_vars ldlm_debugfs_list[] = { - { "dump_namespaces", &ldlm_dump_ns_fops, NULL, 0222 }, - { "dump_granted_max", &ldlm_rw_uint_fops, &ldlm_dump_granted_max }, - { NULL } -}; - -int ldlm_debugfs_setup(void) -{ - int rc; - - ldlm_debugfs_dir = ldebugfs_register(OBD_LDLM_DEVICENAME, - debugfs_lustre_root, - NULL, NULL); - if (IS_ERR_OR_NULL(ldlm_debugfs_dir)) { - CERROR("LProcFS failed in ldlm-init\n"); - rc = ldlm_debugfs_dir ? PTR_ERR(ldlm_debugfs_dir) : -ENOMEM; - goto err; - } - - ldlm_ns_debugfs_dir = ldebugfs_register("namespaces", - ldlm_debugfs_dir, - NULL, NULL); - if (IS_ERR_OR_NULL(ldlm_ns_debugfs_dir)) { - CERROR("LProcFS failed in ldlm-init\n"); - rc = ldlm_ns_debugfs_dir ? PTR_ERR(ldlm_ns_debugfs_dir) - : -ENOMEM; - goto err_type; - } - - ldlm_svc_debugfs_dir = ldebugfs_register("services", - ldlm_debugfs_dir, - NULL, NULL); - if (IS_ERR_OR_NULL(ldlm_svc_debugfs_dir)) { - CERROR("LProcFS failed in ldlm-init\n"); - rc = ldlm_svc_debugfs_dir ? PTR_ERR(ldlm_svc_debugfs_dir) - : -ENOMEM; - goto err_ns; - } - - rc = ldebugfs_add_vars(ldlm_debugfs_dir, ldlm_debugfs_list, NULL); - if (rc) { - CERROR("LProcFS failed in ldlm-init\n"); - goto err_svc; - } - - return 0; - -err_svc: - ldebugfs_remove(&ldlm_svc_debugfs_dir); -err_ns: - ldebugfs_remove(&ldlm_ns_debugfs_dir); -err_type: - ldebugfs_remove(&ldlm_debugfs_dir); -err: - ldlm_svc_debugfs_dir = NULL; - ldlm_ns_debugfs_dir = NULL; - ldlm_debugfs_dir = NULL; - return rc; -} - -void ldlm_debugfs_cleanup(void) -{ - if (!IS_ERR_OR_NULL(ldlm_svc_debugfs_dir)) - ldebugfs_remove(&ldlm_svc_debugfs_dir); - - if (!IS_ERR_OR_NULL(ldlm_ns_debugfs_dir)) - ldebugfs_remove(&ldlm_ns_debugfs_dir); - - if (!IS_ERR_OR_NULL(ldlm_debugfs_dir)) - ldebugfs_remove(&ldlm_debugfs_dir); - - ldlm_svc_debugfs_dir = NULL; - ldlm_ns_debugfs_dir = NULL; - ldlm_debugfs_dir = NULL; -} - -static ssize_t resource_count_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - __u64 res = 0; - struct cfs_hash_bd bd; - int i; - - /* result is not strictly consistent */ - cfs_hash_for_each_bucket(ns->ns_rs_hash, &bd, i) - res += cfs_hash_bd_count_get(&bd); - return sprintf(buf, "%lld\n", res); -} -LUSTRE_RO_ATTR(resource_count); - -static ssize_t lock_count_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - __u64 locks; - - locks = lprocfs_stats_collector(ns->ns_stats, LDLM_NSS_LOCKS, - LPROCFS_FIELDS_FLAGS_SUM); - return sprintf(buf, "%lld\n", locks); -} -LUSTRE_RO_ATTR(lock_count); - -static ssize_t lock_unused_count_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - - return sprintf(buf, "%d\n", ns->ns_nr_unused); -} -LUSTRE_RO_ATTR(lock_unused_count); - -static ssize_t lru_size_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - __u32 *nr = &ns->ns_max_unused; - - if (ns_connect_lru_resize(ns)) - nr = &ns->ns_nr_unused; - return sprintf(buf, "%u\n", *nr); -} - -static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - unsigned long tmp; - int lru_resize; - int err; - - if (strncmp(buffer, "clear", 5) == 0) { - CDEBUG(D_DLMTRACE, - "dropping all unused locks from namespace %s\n", - ldlm_ns_name(ns)); - if (ns_connect_lru_resize(ns)) { - int canceled, unused = ns->ns_nr_unused; - - /* Try to cancel all @ns_nr_unused locks. */ - canceled = ldlm_cancel_lru(ns, unused, 0, - LDLM_LRU_FLAG_PASSED); - if (canceled < unused) { - CDEBUG(D_DLMTRACE, - "not all requested locks are canceled, requested: %d, canceled: %d\n", - unused, - canceled); - return -EINVAL; - } - } else { - tmp = ns->ns_max_unused; - ns->ns_max_unused = 0; - ldlm_cancel_lru(ns, 0, 0, LDLM_LRU_FLAG_PASSED); - ns->ns_max_unused = tmp; - } - return count; - } - - err = kstrtoul(buffer, 10, &tmp); - if (err != 0) { - CERROR("lru_size: invalid value written\n"); - return -EINVAL; - } - lru_resize = (tmp == 0); - - if (ns_connect_lru_resize(ns)) { - if (!lru_resize) - ns->ns_max_unused = (unsigned int)tmp; - - if (tmp > ns->ns_nr_unused) - tmp = ns->ns_nr_unused; - tmp = ns->ns_nr_unused - tmp; - - CDEBUG(D_DLMTRACE, - "changing namespace %s unused locks from %u to %u\n", - ldlm_ns_name(ns), ns->ns_nr_unused, - (unsigned int)tmp); - ldlm_cancel_lru(ns, tmp, LCF_ASYNC, LDLM_LRU_FLAG_PASSED); - - if (!lru_resize) { - CDEBUG(D_DLMTRACE, - "disable lru_resize for namespace %s\n", - ldlm_ns_name(ns)); - ns->ns_connect_flags &= ~OBD_CONNECT_LRU_RESIZE; - } - } else { - CDEBUG(D_DLMTRACE, - "changing namespace %s max_unused from %u to %u\n", - ldlm_ns_name(ns), ns->ns_max_unused, - (unsigned int)tmp); - ns->ns_max_unused = (unsigned int)tmp; - ldlm_cancel_lru(ns, 0, LCF_ASYNC, LDLM_LRU_FLAG_PASSED); - - /* Make sure that LRU resize was originally supported before - * turning it on here. - */ - if (lru_resize && - (ns->ns_orig_connect_flags & OBD_CONNECT_LRU_RESIZE)) { - CDEBUG(D_DLMTRACE, - "enable lru_resize for namespace %s\n", - ldlm_ns_name(ns)); - ns->ns_connect_flags |= OBD_CONNECT_LRU_RESIZE; - } - } - - return count; -} -LUSTRE_RW_ATTR(lru_size); - -static ssize_t lru_max_age_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - - return sprintf(buf, "%u\n", ns->ns_max_age); -} - -static ssize_t lru_max_age_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - unsigned long tmp; - int err; - - err = kstrtoul(buffer, 10, &tmp); - if (err != 0) - return -EINVAL; - - ns->ns_max_age = tmp; - - return count; -} -LUSTRE_RW_ATTR(lru_max_age); - -static ssize_t early_lock_cancel_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - - return sprintf(buf, "%d\n", ns_connect_cancelset(ns)); -} - -static ssize_t early_lock_cancel_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - unsigned long supp = -1; - int rc; - - rc = kstrtoul(buffer, 10, &supp); - if (rc < 0) - return rc; - - if (supp == 0) - ns->ns_connect_flags &= ~OBD_CONNECT_CANCELSET; - else if (ns->ns_orig_connect_flags & OBD_CONNECT_CANCELSET) - ns->ns_connect_flags |= OBD_CONNECT_CANCELSET; - return count; -} -LUSTRE_RW_ATTR(early_lock_cancel); - -/* These are for namespaces in /sys/fs/lustre/ldlm/namespaces/ */ -static struct attribute *ldlm_ns_attrs[] = { - &lustre_attr_resource_count.attr, - &lustre_attr_lock_count.attr, - &lustre_attr_lock_unused_count.attr, - &lustre_attr_lru_size.attr, - &lustre_attr_lru_max_age.attr, - &lustre_attr_early_lock_cancel.attr, - NULL, -}; - -static void ldlm_ns_release(struct kobject *kobj) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - complete(&ns->ns_kobj_unregister); -} - -static struct kobj_type ldlm_ns_ktype = { - .default_attrs = ldlm_ns_attrs, - .sysfs_ops = &lustre_sysfs_ops, - .release = ldlm_ns_release, -}; - -static void ldlm_namespace_debugfs_unregister(struct ldlm_namespace *ns) -{ - if (IS_ERR_OR_NULL(ns->ns_debugfs_entry)) - CERROR("dlm namespace %s has no procfs dir?\n", - ldlm_ns_name(ns)); - else - ldebugfs_remove(&ns->ns_debugfs_entry); - - if (ns->ns_stats) - lprocfs_free_stats(&ns->ns_stats); -} - -static void ldlm_namespace_sysfs_unregister(struct ldlm_namespace *ns) -{ - kobject_put(&ns->ns_kobj); - wait_for_completion(&ns->ns_kobj_unregister); -} - -static int ldlm_namespace_sysfs_register(struct ldlm_namespace *ns) -{ - int err; - - ns->ns_kobj.kset = ldlm_ns_kset; - init_completion(&ns->ns_kobj_unregister); - err = kobject_init_and_add(&ns->ns_kobj, &ldlm_ns_ktype, NULL, - "%s", ldlm_ns_name(ns)); - - ns->ns_stats = lprocfs_alloc_stats(LDLM_NSS_LAST, 0); - if (!ns->ns_stats) { - kobject_put(&ns->ns_kobj); - return -ENOMEM; - } - - lprocfs_counter_init(ns->ns_stats, LDLM_NSS_LOCKS, - LPROCFS_CNTR_AVGMINMAX, "locks", "locks"); - - return err; -} - -static int ldlm_namespace_debugfs_register(struct ldlm_namespace *ns) -{ - struct dentry *ns_entry; - - if (!IS_ERR_OR_NULL(ns->ns_debugfs_entry)) { - ns_entry = ns->ns_debugfs_entry; - } else { - ns_entry = debugfs_create_dir(ldlm_ns_name(ns), - ldlm_ns_debugfs_dir); - if (!ns_entry) - return -ENOMEM; - ns->ns_debugfs_entry = ns_entry; - } - - return 0; -} - -#undef MAX_STRING_SIZE - -static struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res) -{ - LASSERT(res); - LASSERT(res != LP_POISON); - atomic_inc(&res->lr_refcount); - CDEBUG(D_INFO, "getref res: %p count: %d\n", res, - atomic_read(&res->lr_refcount)); - return res; -} - -static unsigned int ldlm_res_hop_hash(struct cfs_hash *hs, - const void *key, unsigned int mask) -{ - const struct ldlm_res_id *id = key; - unsigned int val = 0; - unsigned int i; - - for (i = 0; i < RES_NAME_SIZE; i++) - val += id->name[i]; - return val & mask; -} - -static unsigned int ldlm_res_hop_fid_hash(struct cfs_hash *hs, - const void *key, unsigned int mask) -{ - const struct ldlm_res_id *id = key; - struct lu_fid fid; - __u32 hash; - __u32 val; - - fid.f_seq = id->name[LUSTRE_RES_ID_SEQ_OFF]; - fid.f_oid = (__u32)id->name[LUSTRE_RES_ID_VER_OID_OFF]; - fid.f_ver = (__u32)(id->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32); - - hash = fid_flatten32(&fid); - hash += (hash >> 4) + (hash << 12); /* mixing oid and seq */ - if (id->name[LUSTRE_RES_ID_HSH_OFF] != 0) { - val = id->name[LUSTRE_RES_ID_HSH_OFF]; - hash += (val >> 5) + (val << 11); - } else { - val = fid_oid(&fid); - } - hash = hash_long(hash, hs->hs_bkt_bits); - /* give me another random factor */ - hash -= hash_long((unsigned long)hs, val % 11 + 3); - - hash <<= hs->hs_cur_bits - hs->hs_bkt_bits; - hash |= ldlm_res_hop_hash(hs, key, CFS_HASH_NBKT(hs) - 1); - - return hash & mask; -} - -static void *ldlm_res_hop_key(struct hlist_node *hnode) -{ - struct ldlm_resource *res; - - res = hlist_entry(hnode, struct ldlm_resource, lr_hash); - return &res->lr_name; -} - -static int ldlm_res_hop_keycmp(const void *key, struct hlist_node *hnode) -{ - struct ldlm_resource *res; - - res = hlist_entry(hnode, struct ldlm_resource, lr_hash); - return ldlm_res_eq((const struct ldlm_res_id *)key, - (const struct ldlm_res_id *)&res->lr_name); -} - -static void *ldlm_res_hop_object(struct hlist_node *hnode) -{ - return hlist_entry(hnode, struct ldlm_resource, lr_hash); -} - -static void ldlm_res_hop_get_locked(struct cfs_hash *hs, - struct hlist_node *hnode) -{ - struct ldlm_resource *res; - - res = hlist_entry(hnode, struct ldlm_resource, lr_hash); - ldlm_resource_getref(res); -} - -static void ldlm_res_hop_put(struct cfs_hash *hs, struct hlist_node *hnode) -{ - struct ldlm_resource *res; - - res = hlist_entry(hnode, struct ldlm_resource, lr_hash); - ldlm_resource_putref(res); -} - -static struct cfs_hash_ops ldlm_ns_hash_ops = { - .hs_hash = ldlm_res_hop_hash, - .hs_key = ldlm_res_hop_key, - .hs_keycmp = ldlm_res_hop_keycmp, - .hs_keycpy = NULL, - .hs_object = ldlm_res_hop_object, - .hs_get = ldlm_res_hop_get_locked, - .hs_put = ldlm_res_hop_put -}; - -static struct cfs_hash_ops ldlm_ns_fid_hash_ops = { - .hs_hash = ldlm_res_hop_fid_hash, - .hs_key = ldlm_res_hop_key, - .hs_keycmp = ldlm_res_hop_keycmp, - .hs_keycpy = NULL, - .hs_object = ldlm_res_hop_object, - .hs_get = ldlm_res_hop_get_locked, - .hs_put = ldlm_res_hop_put -}; - -struct ldlm_ns_hash_def { - enum ldlm_ns_type nsd_type; - /** hash bucket bits */ - unsigned int nsd_bkt_bits; - /** hash bits */ - unsigned int nsd_all_bits; - /** hash operations */ - struct cfs_hash_ops *nsd_hops; -}; - -static struct ldlm_ns_hash_def ldlm_ns_hash_defs[] = { - { - .nsd_type = LDLM_NS_TYPE_MDC, - .nsd_bkt_bits = 11, - .nsd_all_bits = 16, - .nsd_hops = &ldlm_ns_fid_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_MDT, - .nsd_bkt_bits = 14, - .nsd_all_bits = 21, - .nsd_hops = &ldlm_ns_fid_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_OSC, - .nsd_bkt_bits = 8, - .nsd_all_bits = 12, - .nsd_hops = &ldlm_ns_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_OST, - .nsd_bkt_bits = 11, - .nsd_all_bits = 17, - .nsd_hops = &ldlm_ns_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_MGC, - .nsd_bkt_bits = 4, - .nsd_all_bits = 4, - .nsd_hops = &ldlm_ns_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_MGT, - .nsd_bkt_bits = 4, - .nsd_all_bits = 4, - .nsd_hops = &ldlm_ns_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_UNKNOWN, - }, -}; - -/** Register \a ns in the list of namespaces */ -static void ldlm_namespace_register(struct ldlm_namespace *ns, - enum ldlm_side client) -{ - mutex_lock(ldlm_namespace_lock(client)); - LASSERT(list_empty(&ns->ns_list_chain)); - list_add(&ns->ns_list_chain, &ldlm_cli_inactive_namespace_list); - ldlm_namespace_nr_inc(client); - mutex_unlock(ldlm_namespace_lock(client)); -} - -/** - * Create and initialize new empty namespace. - */ -struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name, - enum ldlm_side client, - enum ldlm_appetite apt, - enum ldlm_ns_type ns_type) -{ - struct ldlm_namespace *ns = NULL; - struct ldlm_ns_bucket *nsb; - struct ldlm_ns_hash_def *nsd; - struct cfs_hash_bd bd; - int idx; - int rc; - - LASSERT(obd); - - rc = ldlm_get_ref(); - if (rc) { - CERROR("ldlm_get_ref failed: %d\n", rc); - return NULL; - } - - for (idx = 0;; idx++) { - nsd = &ldlm_ns_hash_defs[idx]; - if (nsd->nsd_type == LDLM_NS_TYPE_UNKNOWN) { - CERROR("Unknown type %d for ns %s\n", ns_type, name); - goto out_ref; - } - - if (nsd->nsd_type == ns_type) - break; - } - - ns = kzalloc(sizeof(*ns), GFP_NOFS); - if (!ns) - goto out_ref; - - ns->ns_rs_hash = cfs_hash_create(name, - nsd->nsd_all_bits, nsd->nsd_all_bits, - nsd->nsd_bkt_bits, sizeof(*nsb), - CFS_HASH_MIN_THETA, - CFS_HASH_MAX_THETA, - nsd->nsd_hops, - CFS_HASH_DEPTH | - CFS_HASH_BIGNAME | - CFS_HASH_SPIN_BKTLOCK | - CFS_HASH_NO_ITEMREF); - if (!ns->ns_rs_hash) - goto out_ns; - - cfs_hash_for_each_bucket(ns->ns_rs_hash, &bd, idx) { - nsb = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd); - at_init(&nsb->nsb_at_estimate, ldlm_enqueue_min, 0); - nsb->nsb_namespace = ns; - } - - ns->ns_obd = obd; - ns->ns_appetite = apt; - ns->ns_client = client; - - INIT_LIST_HEAD(&ns->ns_list_chain); - INIT_LIST_HEAD(&ns->ns_unused_list); - spin_lock_init(&ns->ns_lock); - atomic_set(&ns->ns_bref, 0); - init_waitqueue_head(&ns->ns_waitq); - - ns->ns_max_parallel_ast = LDLM_DEFAULT_PARALLEL_AST_LIMIT; - ns->ns_nr_unused = 0; - ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE; - ns->ns_max_age = LDLM_DEFAULT_MAX_ALIVE; - ns->ns_orig_connect_flags = 0; - ns->ns_connect_flags = 0; - ns->ns_stopping = 0; - - rc = ldlm_namespace_sysfs_register(ns); - if (rc != 0) { - CERROR("Can't initialize ns sysfs, rc %d\n", rc); - goto out_hash; - } - - rc = ldlm_namespace_debugfs_register(ns); - if (rc != 0) { - CERROR("Can't initialize ns proc, rc %d\n", rc); - goto out_sysfs; - } - - idx = ldlm_namespace_nr_read(client); - rc = ldlm_pool_init(&ns->ns_pool, ns, idx, client); - if (rc) { - CERROR("Can't initialize lock pool, rc %d\n", rc); - goto out_proc; - } - - ldlm_namespace_register(ns, client); - return ns; -out_proc: - ldlm_namespace_debugfs_unregister(ns); -out_sysfs: - ldlm_namespace_sysfs_unregister(ns); - ldlm_namespace_cleanup(ns, 0); -out_hash: - cfs_hash_putref(ns->ns_rs_hash); -out_ns: - kfree(ns); -out_ref: - ldlm_put_ref(); - return NULL; -} -EXPORT_SYMBOL(ldlm_namespace_new); - -extern struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); - -/** - * Cancel and destroy all locks on a resource. - * - * If flags contains FL_LOCAL_ONLY, don't try to tell the server, just - * clean up. This is currently only used for recovery, and we make - * certain assumptions as a result--notably, that we shouldn't cancel - * locks with refs. - */ -static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, - __u64 flags) -{ - int rc = 0; - bool local_only = !!(flags & LDLM_FL_LOCAL_ONLY); - - do { - struct ldlm_lock *lock = NULL, *tmp; - struct lustre_handle lockh; - - /* First, we look for non-cleaned-yet lock - * all cleaned locks are marked by CLEANED flag. - */ - lock_res(res); - list_for_each_entry(tmp, q, l_res_link) { - if (ldlm_is_cleaned(tmp)) - continue; - - lock = tmp; - LDLM_LOCK_GET(lock); - ldlm_set_cleaned(lock); - break; - } - - if (!lock) { - unlock_res(res); - break; - } - - /* Set CBPENDING so nothing in the cancellation path - * can match this lock. - */ - ldlm_set_cbpending(lock); - ldlm_set_failed(lock); - lock->l_flags |= flags; - - /* ... without sending a CANCEL message for local_only. */ - if (local_only) - ldlm_set_local_only(lock); - - if (local_only && (lock->l_readers || lock->l_writers)) { - /* This is a little bit gross, but much better than the - * alternative: pretend that we got a blocking AST from - * the server, so that when the lock is decref'd, it - * will go away ... - */ - unlock_res(res); - LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY"); - if (lock->l_flags & LDLM_FL_FAIL_LOC) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(4 * HZ); - set_current_state(TASK_RUNNING); - } - if (lock->l_completion_ast) - lock->l_completion_ast(lock, LDLM_FL_FAILED, - NULL); - LDLM_LOCK_RELEASE(lock); - continue; - } - - unlock_res(res); - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh, LCF_LOCAL); - if (rc) - CERROR("ldlm_cli_cancel: %d\n", rc); - LDLM_LOCK_RELEASE(lock); - } while (1); -} - -static int ldlm_resource_clean(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - __u64 flags = *(__u64 *)arg; - - cleanup_resource(res, &res->lr_granted, flags); - cleanup_resource(res, &res->lr_waiting, flags); - - return 0; -} - -static int ldlm_resource_complain(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - - lock_res(res); - CERROR("%s: namespace resource " DLDLMRES - " (%p) refcount nonzero (%d) after lock cleanup; forcing cleanup.\n", - ldlm_ns_name(ldlm_res_to_ns(res)), PLDLMRES(res), res, - atomic_read(&res->lr_refcount) - 1); - - ldlm_resource_dump(D_ERROR, res); - unlock_res(res); - return 0; -} - -/** - * Cancel and destroy all locks in the namespace. - * - * Typically used during evictions when server notified client that it was - * evicted and all of its state needs to be destroyed. - * Also used during shutdown. - */ -int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags) -{ - if (!ns) { - CDEBUG(D_INFO, "NULL ns, skipping cleanup\n"); - return ELDLM_OK; - } - - cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_clean, - &flags, 0); - cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_complain, - NULL, 0); - return ELDLM_OK; -} -EXPORT_SYMBOL(ldlm_namespace_cleanup); - -/** - * Attempts to free namespace. - * - * Only used when namespace goes away, like during an unmount. - */ -static int __ldlm_namespace_free(struct ldlm_namespace *ns, int force) -{ - /* At shutdown time, don't call the cancellation callback */ - ldlm_namespace_cleanup(ns, force ? LDLM_FL_LOCAL_ONLY : 0); - - if (atomic_read(&ns->ns_bref) > 0) { - int rc; - - CDEBUG(D_DLMTRACE, - "dlm namespace %s free waiting on refcount %d\n", - ldlm_ns_name(ns), atomic_read(&ns->ns_bref)); -force_wait: - if (force) - rc = wait_event_idle_timeout(ns->ns_waitq, - atomic_read(&ns->ns_bref) == 0, - obd_timeout * HZ / 4) ? 0 : -ETIMEDOUT; - else - rc = l_wait_event_abortable(ns->ns_waitq, - atomic_read(&ns->ns_bref) == 0); - - /* Forced cleanups should be able to reclaim all references, - * so it's safe to wait forever... we can't leak locks... - */ - if (force && rc == -ETIMEDOUT) { - LCONSOLE_ERROR("Forced cleanup waiting for %s namespace with %d resources in use, (rc=%d)\n", - ldlm_ns_name(ns), - atomic_read(&ns->ns_bref), rc); - goto force_wait; - } - - if (atomic_read(&ns->ns_bref)) { - LCONSOLE_ERROR("Cleanup waiting for %s namespace with %d resources in use, (rc=%d)\n", - ldlm_ns_name(ns), - atomic_read(&ns->ns_bref), rc); - return ELDLM_NAMESPACE_EXISTS; - } - CDEBUG(D_DLMTRACE, "dlm namespace %s free done waiting\n", - ldlm_ns_name(ns)); - } - - return ELDLM_OK; -} - -/** - * Performs various cleanups for passed \a ns to make it drop refc and be - * ready for freeing. Waits for refc == 0. - * - * The following is done: - * (0) Unregister \a ns from its list to make inaccessible for potential - * users like pools thread and others; - * (1) Clear all locks in \a ns. - */ -void ldlm_namespace_free_prior(struct ldlm_namespace *ns, - struct obd_import *imp, - int force) -{ - int rc; - - if (!ns) - return; - - spin_lock(&ns->ns_lock); - ns->ns_stopping = 1; - spin_unlock(&ns->ns_lock); - - /* - * Can fail with -EINTR when force == 0 in which case try harder. - */ - rc = __ldlm_namespace_free(ns, force); - if (rc != ELDLM_OK) { - if (imp) { - ptlrpc_disconnect_import(imp, 0); - ptlrpc_invalidate_import(imp); - } - - /* - * With all requests dropped and the import inactive - * we are guaranteed all reference will be dropped. - */ - rc = __ldlm_namespace_free(ns, 1); - LASSERT(rc == 0); - } -} - -/** Unregister \a ns from the list of namespaces. */ -static void ldlm_namespace_unregister(struct ldlm_namespace *ns, - enum ldlm_side client) -{ - mutex_lock(ldlm_namespace_lock(client)); - LASSERT(!list_empty(&ns->ns_list_chain)); - /* Some asserts and possibly other parts of the code are still - * using list_empty(&ns->ns_list_chain). This is why it is - * important to use list_del_init() here. - */ - list_del_init(&ns->ns_list_chain); - ldlm_namespace_nr_dec(client); - mutex_unlock(ldlm_namespace_lock(client)); -} - -/** - * Performs freeing memory structures related to \a ns. This is only done - * when ldlm_namespce_free_prior() successfully removed all resources - * referencing \a ns and its refc == 0. - */ -void ldlm_namespace_free_post(struct ldlm_namespace *ns) -{ - if (!ns) - return; - - /* Make sure that nobody can find this ns in its list. */ - ldlm_namespace_unregister(ns, ns->ns_client); - /* Fini pool _before_ parent proc dir is removed. This is important as - * ldlm_pool_fini() removes own proc dir which is child to @dir. - * Removing it after @dir may cause oops. - */ - ldlm_pool_fini(&ns->ns_pool); - - ldlm_namespace_debugfs_unregister(ns); - ldlm_namespace_sysfs_unregister(ns); - cfs_hash_putref(ns->ns_rs_hash); - /* Namespace \a ns should be not on list at this time, otherwise - * this will cause issues related to using freed \a ns in poold - * thread. - */ - LASSERT(list_empty(&ns->ns_list_chain)); - kfree(ns); - ldlm_put_ref(); -} - -void ldlm_namespace_get(struct ldlm_namespace *ns) -{ - atomic_inc(&ns->ns_bref); -} - -/* This is only for callers that care about refcount */ -static int ldlm_namespace_get_return(struct ldlm_namespace *ns) -{ - return atomic_inc_return(&ns->ns_bref); -} - -void ldlm_namespace_put(struct ldlm_namespace *ns) -{ - if (atomic_dec_and_lock(&ns->ns_bref, &ns->ns_lock)) { - wake_up(&ns->ns_waitq); - spin_unlock(&ns->ns_lock); - } -} - -/** Should be called with ldlm_namespace_lock(client) taken. */ -void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *ns, - enum ldlm_side client) -{ - LASSERT(!list_empty(&ns->ns_list_chain)); - LASSERT(mutex_is_locked(ldlm_namespace_lock(client))); - list_move_tail(&ns->ns_list_chain, ldlm_namespace_list(client)); -} - -/** Should be called with ldlm_namespace_lock(client) taken. */ -void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns, - enum ldlm_side client) -{ - LASSERT(!list_empty(&ns->ns_list_chain)); - LASSERT(mutex_is_locked(ldlm_namespace_lock(client))); - list_move_tail(&ns->ns_list_chain, &ldlm_cli_inactive_namespace_list); -} - -/** Should be called with ldlm_namespace_lock(client) taken. */ -struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side client) -{ - LASSERT(mutex_is_locked(ldlm_namespace_lock(client))); - LASSERT(!list_empty(ldlm_namespace_list(client))); - return container_of(ldlm_namespace_list(client)->next, - struct ldlm_namespace, ns_list_chain); -} - -/** Create and initialize new resource. */ -static struct ldlm_resource *ldlm_resource_new(void) -{ - struct ldlm_resource *res; - int idx; - - res = kmem_cache_zalloc(ldlm_resource_slab, GFP_NOFS); - if (!res) - return NULL; - - INIT_LIST_HEAD(&res->lr_granted); - INIT_LIST_HEAD(&res->lr_waiting); - - /* Initialize interval trees for each lock mode. */ - for (idx = 0; idx < LCK_MODE_NUM; idx++) { - res->lr_itree[idx].lit_size = 0; - res->lr_itree[idx].lit_mode = 1 << idx; - res->lr_itree[idx].lit_root = NULL; - } - - atomic_set(&res->lr_refcount, 1); - spin_lock_init(&res->lr_lock); - lu_ref_init(&res->lr_reference); - - /* The creator of the resource must unlock the mutex after LVB - * initialization. - */ - mutex_init(&res->lr_lvb_mutex); - mutex_lock(&res->lr_lvb_mutex); - - return res; -} - -/** - * Return a reference to resource with given name, creating it if necessary. - * Args: namespace with ns_lock unlocked - * Locks: takes and releases NS hash-lock and res->lr_lock - * Returns: referenced, unlocked ldlm_resource or NULL - */ -struct ldlm_resource * -ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, - const struct ldlm_res_id *name, enum ldlm_type type, - int create) -{ - struct hlist_node *hnode; - struct ldlm_resource *res = NULL; - struct cfs_hash_bd bd; - __u64 version; - int ns_refcount = 0; - int rc; - - LASSERT(!parent); - LASSERT(ns->ns_rs_hash); - LASSERT(name->name[0] != 0); - - cfs_hash_bd_get_and_lock(ns->ns_rs_hash, (void *)name, &bd, 0); - hnode = cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name); - if (hnode) { - cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0); - goto lvbo_init; - } - - version = cfs_hash_bd_version_get(&bd); - cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0); - - if (create == 0) - return ERR_PTR(-ENOENT); - - LASSERTF(type >= LDLM_MIN_TYPE && type < LDLM_MAX_TYPE, - "type: %d\n", type); - res = ldlm_resource_new(); - if (!res) - return ERR_PTR(-ENOMEM); - - res->lr_ns_bucket = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd); - res->lr_name = *name; - res->lr_type = type; - - cfs_hash_bd_lock(ns->ns_rs_hash, &bd, 1); - hnode = (version == cfs_hash_bd_version_get(&bd)) ? NULL : - cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name); - - if (hnode) { - /* Someone won the race and already added the resource. */ - cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1); - /* Clean lu_ref for failed resource. */ - lu_ref_fini(&res->lr_reference); - /* We have taken lr_lvb_mutex. Drop it. */ - mutex_unlock(&res->lr_lvb_mutex); - kmem_cache_free(ldlm_resource_slab, res); -lvbo_init: - res = hlist_entry(hnode, struct ldlm_resource, lr_hash); - /* Synchronize with regard to resource creation. */ - if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) { - mutex_lock(&res->lr_lvb_mutex); - mutex_unlock(&res->lr_lvb_mutex); - } - - if (unlikely(res->lr_lvb_len < 0)) { - rc = res->lr_lvb_len; - ldlm_resource_putref(res); - res = ERR_PTR(rc); - } - return res; - } - /* We won! Let's add the resource. */ - cfs_hash_bd_add_locked(ns->ns_rs_hash, &bd, &res->lr_hash); - if (cfs_hash_bd_count_get(&bd) == 1) - ns_refcount = ldlm_namespace_get_return(ns); - - cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1); - if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) { - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CREATE_RESOURCE, 2); - rc = ns->ns_lvbo->lvbo_init(res); - if (rc < 0) { - CERROR("%s: lvbo_init failed for resource %#llx:%#llx: rc = %d\n", - ns->ns_obd->obd_name, name->name[0], - name->name[1], rc); - res->lr_lvb_len = rc; - mutex_unlock(&res->lr_lvb_mutex); - ldlm_resource_putref(res); - return ERR_PTR(rc); - } - } - - /* We create resource with locked lr_lvb_mutex. */ - mutex_unlock(&res->lr_lvb_mutex); - - /* Let's see if we happened to be the very first resource in this - * namespace. If so, and this is a client namespace, we need to move - * the namespace into the active namespaces list to be patrolled by - * the ldlm_poold. - */ - if (ns_refcount == 1) { - mutex_lock(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT)); - ldlm_namespace_move_to_active_locked(ns, LDLM_NAMESPACE_CLIENT); - mutex_unlock(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT)); - } - - return res; -} -EXPORT_SYMBOL(ldlm_resource_get); - -static void __ldlm_resource_putref_final(struct cfs_hash_bd *bd, - struct ldlm_resource *res) -{ - struct ldlm_ns_bucket *nsb = res->lr_ns_bucket; - - if (!list_empty(&res->lr_granted)) { - ldlm_resource_dump(D_ERROR, res); - LBUG(); - } - - if (!list_empty(&res->lr_waiting)) { - ldlm_resource_dump(D_ERROR, res); - LBUG(); - } - - cfs_hash_bd_del_locked(nsb->nsb_namespace->ns_rs_hash, - bd, &res->lr_hash); - lu_ref_fini(&res->lr_reference); - if (cfs_hash_bd_count_get(bd) == 0) - ldlm_namespace_put(nsb->nsb_namespace); -} - -/* Returns 1 if the resource was freed, 0 if it remains. */ -int ldlm_resource_putref(struct ldlm_resource *res) -{ - struct ldlm_namespace *ns = ldlm_res_to_ns(res); - struct cfs_hash_bd bd; - - LASSERT_ATOMIC_GT_LT(&res->lr_refcount, 0, LI_POISON); - CDEBUG(D_INFO, "putref res: %p count: %d\n", - res, atomic_read(&res->lr_refcount) - 1); - - cfs_hash_bd_get(ns->ns_rs_hash, &res->lr_name, &bd); - if (cfs_hash_bd_dec_and_lock(ns->ns_rs_hash, &bd, &res->lr_refcount)) { - __ldlm_resource_putref_final(&bd, res); - cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1); - if (ns->ns_lvbo && ns->ns_lvbo->lvbo_free) - ns->ns_lvbo->lvbo_free(res); - kmem_cache_free(ldlm_resource_slab, res); - return 1; - } - return 0; -} -EXPORT_SYMBOL(ldlm_resource_putref); - -/** - * Add a lock into a given resource into specified lock list. - */ -void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, - struct ldlm_lock *lock) -{ - check_res_locked(res); - - LDLM_DEBUG(lock, "About to add this lock:"); - - if (ldlm_is_destroyed(lock)) { - CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); - return; - } - - LASSERT(list_empty(&lock->l_res_link)); - - list_add_tail(&lock->l_res_link, head); -} - -void ldlm_resource_unlink_lock(struct ldlm_lock *lock) -{ - int type = lock->l_resource->lr_type; - - check_res_locked(lock->l_resource); - if (type == LDLM_IBITS || type == LDLM_PLAIN) - ldlm_unlink_lock_skiplist(lock); - else if (type == LDLM_EXTENT) - ldlm_extent_unlink_lock(lock); - list_del_init(&lock->l_res_link); -} -EXPORT_SYMBOL(ldlm_resource_unlink_lock); - -void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc) -{ - desc->lr_type = res->lr_type; - desc->lr_name = res->lr_name; -} - -/** - * Print information about all locks in all namespaces on this node to debug - * log. - */ -void ldlm_dump_all_namespaces(enum ldlm_side client, int level) -{ - struct ldlm_namespace *ns; - - if (!((libcfs_debug | D_ERROR) & level)) - return; - - mutex_lock(ldlm_namespace_lock(client)); - - list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain) - ldlm_namespace_dump(level, ns); - - mutex_unlock(ldlm_namespace_lock(client)); -} - -static int ldlm_res_hash_dump(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - int level = (int)(unsigned long)arg; - - lock_res(res); - ldlm_resource_dump(level, res); - unlock_res(res); - - return 0; -} - -/** - * Print information about all locks in this namespace on this node to debug - * log. - */ -void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) -{ - if (!((libcfs_debug | D_ERROR) & level)) - return; - - CDEBUG(level, "--- Namespace: %s (rc: %d, side: client)\n", - ldlm_ns_name(ns), atomic_read(&ns->ns_bref)); - - if (time_before(cfs_time_current(), ns->ns_next_dump)) - return; - - cfs_hash_for_each_nolock(ns->ns_rs_hash, - ldlm_res_hash_dump, - (void *)(unsigned long)level, 0); - spin_lock(&ns->ns_lock); - ns->ns_next_dump = cfs_time_shift(10); - spin_unlock(&ns->ns_lock); -} - -/** - * Print information about all locks in this resource to debug log. - */ -void ldlm_resource_dump(int level, struct ldlm_resource *res) -{ - struct ldlm_lock *lock; - unsigned int granted = 0; - - BUILD_BUG_ON(RES_NAME_SIZE != 4); - - if (!((libcfs_debug | D_ERROR) & level)) - return; - - CDEBUG(level, "--- Resource: " DLDLMRES " (%p) refcount = %d\n", - PLDLMRES(res), res, atomic_read(&res->lr_refcount)); - - if (!list_empty(&res->lr_granted)) { - CDEBUG(level, "Granted locks (in reverse order):\n"); - list_for_each_entry_reverse(lock, &res->lr_granted, - l_res_link) { - LDLM_DEBUG_LIMIT(level, lock, "###"); - if (!(level & D_CANTMASK) && - ++granted > ldlm_dump_granted_max) { - CDEBUG(level, - "only dump %d granted locks to avoid DDOS.\n", - granted); - break; - } - } - } - if (!list_empty(&res->lr_waiting)) { - CDEBUG(level, "Waiting locks:\n"); - list_for_each_entry(lock, &res->lr_waiting, l_res_link) - LDLM_DEBUG_LIMIT(level, lock, "###"); - } -} -EXPORT_SYMBOL(ldlm_resource_dump); |