aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/lustre/lustre/obdclass/lu_object.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/lustre/lustre/obdclass/lu_object.c')
-rw-r--r--drivers/staging/lustre/lustre/obdclass/lu_object.c240
1 files changed, 176 insertions, 64 deletions
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 9b03059f34d6..054e567e6c8d 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -55,6 +55,34 @@
#include "../include/lu_ref.h"
#include <linux/list.h>
+enum {
+ LU_CACHE_PERCENT_MAX = 50,
+ LU_CACHE_PERCENT_DEFAULT = 20
+};
+
+#define LU_CACHE_NR_MAX_ADJUST 128
+#define LU_CACHE_NR_UNLIMITED -1
+#define LU_CACHE_NR_DEFAULT LU_CACHE_NR_UNLIMITED
+#define LU_CACHE_NR_LDISKFS_LIMIT LU_CACHE_NR_UNLIMITED
+#define LU_CACHE_NR_ZFS_LIMIT 256
+
+#define LU_SITE_BITS_MIN 12
+#define LU_SITE_BITS_MAX 24
+/**
+ * total 256 buckets, we don't want too many buckets because:
+ * - consume too much memory
+ * - avoid unbalanced LRU list
+ */
+#define LU_SITE_BKT_BITS 8
+
+static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
+module_param(lu_cache_percent, int, 0644);
+MODULE_PARM_DESC(lu_cache_percent, "Percentage of memory to be used as lu_object cache");
+
+static long lu_cache_nr = LU_CACHE_NR_DEFAULT;
+module_param(lu_cache_nr, long, 0644);
+MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache");
+
static void lu_object_free(const struct lu_env *env, struct lu_object *o);
static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
@@ -310,10 +338,10 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
struct cfs_hash_bd bd2;
struct list_head dispose;
int did_sth;
- int start;
+ unsigned int start;
int count;
int bnr;
- int i;
+ unsigned int i;
if (OBD_FAIL_CHECK(OBD_FAIL_OBD_NO_LRU))
return 0;
@@ -324,8 +352,13 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
* the dispose list, removing them from LRU and hash table.
*/
start = s->ls_purge_start;
- bnr = (nr == ~0) ? -1 : nr / CFS_HASH_NBKT(s->ls_obj_hash) + 1;
+ bnr = (nr == ~0) ? -1 : nr / (int)CFS_HASH_NBKT(s->ls_obj_hash) + 1;
again:
+ /*
+ * It doesn't make any sense to make purge threads parallel, that can
+ * only bring troubles to us. See LU-5331.
+ */
+ mutex_lock(&s->ls_purge_mutex);
did_sth = 0;
cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
if (i < start)
@@ -371,6 +404,7 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
if (nr == 0)
break;
}
+ mutex_unlock(&s->ls_purge_mutex);
if (nr != 0 && did_sth && start != 0) {
start = 0; /* restart from the first bucket */
@@ -573,6 +607,27 @@ static struct lu_object *lu_object_find(const struct lu_env *env,
return lu_object_find_at(env, dev->ld_site->ls_top_dev, f, conf);
}
+/*
+ * Limit the lu_object cache to a maximum of lu_cache_nr objects. Because
+ * the calculation for the number of objects to reclaim is not covered by
+ * a lock the maximum number of objects is capped by LU_CACHE_MAX_ADJUST.
+ * This ensures that many concurrent threads will not accidentally purge
+ * the entire cache.
+ */
+static void lu_object_limit(const struct lu_env *env, struct lu_device *dev)
+{
+ __u64 size, nr;
+
+ if (lu_cache_nr == LU_CACHE_NR_UNLIMITED)
+ return;
+
+ size = cfs_hash_size_get(dev->ld_site->ls_obj_hash);
+ nr = (__u64)lu_cache_nr;
+ if (size > nr)
+ lu_site_purge(env, dev->ld_site,
+ min_t(__u64, size - nr, LU_CACHE_NR_MAX_ADJUST));
+}
+
static struct lu_object *lu_object_new(const struct lu_env *env,
struct lu_device *dev,
const struct lu_fid *f,
@@ -590,6 +645,9 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
cfs_hash_bd_unlock(hs, &bd, 1);
+
+ lu_object_limit(env, dev);
+
return o;
}
@@ -656,6 +714,9 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
if (likely(PTR_ERR(shadow) == -ENOENT)) {
cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
cfs_hash_bd_unlock(hs, &bd, 1);
+
+ lu_object_limit(env, dev);
+
return o;
}
@@ -706,13 +767,15 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env,
struct lu_object *obj;
top = lu_object_find(env, dev, f, conf);
- if (!IS_ERR(top)) {
- obj = lu_object_locate(top->lo_header, dev->ld_type);
- if (!obj)
- lu_object_put(env, top);
- } else {
- obj = top;
+ if (IS_ERR(top))
+ return top;
+
+ obj = lu_object_locate(top->lo_header, dev->ld_type);
+ if (unlikely(!obj)) {
+ lu_object_put(env, top);
+ obj = ERR_PTR(-ENOENT);
}
+
return obj;
}
EXPORT_SYMBOL(lu_object_find_slice);
@@ -726,34 +789,31 @@ int lu_device_type_init(struct lu_device_type *ldt)
{
int result = 0;
+ atomic_set(&ldt->ldt_device_nr, 0);
INIT_LIST_HEAD(&ldt->ldt_linkage);
if (ldt->ldt_ops->ldto_init)
result = ldt->ldt_ops->ldto_init(ldt);
- if (result == 0)
+
+ if (!result) {
+ spin_lock(&obd_types_lock);
list_add(&ldt->ldt_linkage, &lu_device_types);
+ spin_unlock(&obd_types_lock);
+ }
+
return result;
}
EXPORT_SYMBOL(lu_device_type_init);
void lu_device_type_fini(struct lu_device_type *ldt)
{
+ spin_lock(&obd_types_lock);
list_del_init(&ldt->ldt_linkage);
+ spin_unlock(&obd_types_lock);
if (ldt->ldt_ops->ldto_fini)
ldt->ldt_ops->ldto_fini(ldt);
}
EXPORT_SYMBOL(lu_device_type_fini);
-void lu_types_stop(void)
-{
- struct lu_device_type *ldt;
-
- list_for_each_entry(ldt, &lu_device_types, ldt_linkage) {
- if (ldt->ldt_device_nr == 0 && ldt->ldt_ops->ldto_stop)
- ldt->ldt_ops->ldto_stop(ldt);
- }
-}
-EXPORT_SYMBOL(lu_types_stop);
-
/**
* Global list of all sites on this node
*/
@@ -808,22 +868,14 @@ void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
}
EXPORT_SYMBOL(lu_site_print);
-enum {
- LU_CACHE_PERCENT_MAX = 50,
- LU_CACHE_PERCENT_DEFAULT = 20
-};
-
-static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
-module_param(lu_cache_percent, int, 0644);
-MODULE_PARM_DESC(lu_cache_percent, "Percentage of memory to be used as lu_object cache");
-
/**
* Return desired hash table order.
*/
-static int lu_htable_order(void)
+static unsigned long lu_htable_order(struct lu_device *top)
{
+ unsigned long bits_max = LU_SITE_BITS_MAX;
unsigned long cache_size;
- int bits;
+ unsigned long bits;
/*
* Calculate hash table size, assuming that we want reasonable
@@ -854,7 +906,7 @@ static int lu_htable_order(void)
for (bits = 1; (1 << bits) < cache_size; ++bits) {
;
}
- return bits;
+ return clamp_t(typeof(bits), bits, LU_SITE_BITS_MIN, bits_max);
}
static unsigned lu_obj_hop_hash(struct cfs_hash *hs,
@@ -930,28 +982,18 @@ static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d)
/**
* Initialize site \a s, with \a d as the top level device.
*/
-#define LU_SITE_BITS_MIN 12
-#define LU_SITE_BITS_MAX 19
-/**
- * total 256 buckets, we don't want too many buckets because:
- * - consume too much memory
- * - avoid unbalanced LRU list
- */
-#define LU_SITE_BKT_BITS 8
-
int lu_site_init(struct lu_site *s, struct lu_device *top)
{
struct lu_site_bkt_data *bkt;
struct cfs_hash_bd bd;
+ unsigned long bits;
+ unsigned long i;
char name[16];
- int bits;
- int i;
memset(s, 0, sizeof(*s));
- bits = lu_htable_order();
- snprintf(name, 16, "lu_site_%s", top->ld_type->ldt_name);
- for (bits = min(max(LU_SITE_BITS_MIN, bits), LU_SITE_BITS_MAX);
- bits >= LU_SITE_BITS_MIN; bits--) {
+ mutex_init(&s->ls_purge_mutex);
+ snprintf(name, sizeof(name), "lu_site_%s", top->ld_type->ldt_name);
+ for (bits = lu_htable_order(top); bits >= LU_SITE_BITS_MIN; bits--) {
s->ls_obj_hash = cfs_hash_create(name, bits, bits,
bits - LU_SITE_BKT_BITS,
sizeof(*bkt), 0, 0,
@@ -959,13 +1001,14 @@ int lu_site_init(struct lu_site *s, struct lu_device *top)
CFS_HASH_SPIN_BKTLOCK |
CFS_HASH_NO_ITEMREF |
CFS_HASH_DEPTH |
- CFS_HASH_ASSERT_EMPTY);
+ CFS_HASH_ASSERT_EMPTY |
+ CFS_HASH_COUNTER);
if (s->ls_obj_hash)
break;
}
if (!s->ls_obj_hash) {
- CERROR("failed to create lu_site hash with bits: %d\n", bits);
+ CERROR("failed to create lu_site hash with bits: %lu\n", bits);
return -ENOMEM;
}
@@ -1082,8 +1125,10 @@ EXPORT_SYMBOL(lu_device_put);
*/
int lu_device_init(struct lu_device *d, struct lu_device_type *t)
{
- if (t->ldt_device_nr++ == 0 && t->ldt_ops->ldto_start)
+ if (atomic_inc_return(&t->ldt_device_nr) == 1 &&
+ t->ldt_ops->ldto_start)
t->ldt_ops->ldto_start(t);
+
memset(d, 0, sizeof(*d));
atomic_set(&d->ld_ref, 0);
d->ld_type = t;
@@ -1098,9 +1143,8 @@ EXPORT_SYMBOL(lu_device_init);
*/
void lu_device_fini(struct lu_device *d)
{
- struct lu_device_type *t;
+ struct lu_device_type *t = d->ld_type;
- t = d->ld_type;
if (d->ld_obd) {
d->ld_obd->obd_lu_dev = NULL;
d->ld_obd = NULL;
@@ -1109,8 +1153,10 @@ void lu_device_fini(struct lu_device *d)
lu_ref_fini(&d->ld_reference);
LASSERTF(atomic_read(&d->ld_ref) == 0,
"Refcount is %u\n", atomic_read(&d->ld_ref));
- LASSERT(t->ldt_device_nr > 0);
- if (--t->ldt_device_nr == 0 && t->ldt_ops->ldto_stop)
+ LASSERT(atomic_read(&t->ldt_device_nr) > 0);
+
+ if (atomic_dec_and_test(&t->ldt_device_nr) &&
+ t->ldt_ops->ldto_stop)
t->ldt_ops->ldto_stop(t);
}
EXPORT_SYMBOL(lu_device_fini);
@@ -1254,7 +1300,6 @@ void lu_stack_fini(const struct lu_env *env, struct lu_device *top)
}
}
}
-EXPORT_SYMBOL(lu_stack_fini);
enum {
/**
@@ -1281,7 +1326,7 @@ static unsigned key_set_version;
int lu_context_key_register(struct lu_context_key *key)
{
int result;
- int i;
+ unsigned int i;
LASSERT(key->lct_init);
LASSERT(key->lct_fini);
@@ -1476,18 +1521,16 @@ void lu_context_key_quiesce(struct lu_context_key *key)
++key_set_version;
}
}
-EXPORT_SYMBOL(lu_context_key_quiesce);
void lu_context_key_revive(struct lu_context_key *key)
{
key->lct_tags &= ~LCT_QUIESCENT;
++key_set_version;
}
-EXPORT_SYMBOL(lu_context_key_revive);
static void keys_fini(struct lu_context *ctx)
{
- int i;
+ unsigned int i;
if (!ctx->lc_value)
return;
@@ -1501,7 +1544,7 @@ static void keys_fini(struct lu_context *ctx)
static int keys_fill(struct lu_context *ctx)
{
- int i;
+ unsigned int i;
LINVRNT(ctx->lc_value);
for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
@@ -1614,7 +1657,7 @@ EXPORT_SYMBOL(lu_context_enter);
*/
void lu_context_exit(struct lu_context *ctx)
{
- int i;
+ unsigned int i;
LINVRNT(ctx->lc_state == LCS_ENTERED);
ctx->lc_state = LCS_LEFT;
@@ -1642,7 +1685,6 @@ int lu_context_refill(struct lu_context *ctx)
{
return likely(ctx->lc_version == key_set_version) ? 0 : keys_fill(ctx);
}
-EXPORT_SYMBOL(lu_context_refill);
/**
* lu_ctx_tags/lu_ses_tags will be updated if there are new types of
@@ -1696,7 +1738,7 @@ static void lu_site_stats_get(struct cfs_hash *hs,
struct lu_site_stats *stats, int populated)
{
struct cfs_hash_bd bd;
- int i;
+ unsigned int i;
cfs_hash_for_each_bucket(hs, &bd, i) {
struct lu_site_bkt_data *bkt = cfs_hash_bd_extra_get(hs, &bd);
@@ -1940,3 +1982,73 @@ void lu_kmem_fini(struct lu_kmem_descr *caches)
}
}
EXPORT_SYMBOL(lu_kmem_fini);
+
+void lu_buf_free(struct lu_buf *buf)
+{
+ LASSERT(buf);
+ if (buf->lb_buf) {
+ LASSERT(buf->lb_len > 0);
+ kvfree(buf->lb_buf);
+ buf->lb_buf = NULL;
+ buf->lb_len = 0;
+ }
+}
+EXPORT_SYMBOL(lu_buf_free);
+
+void lu_buf_alloc(struct lu_buf *buf, size_t size)
+{
+ LASSERT(buf);
+ LASSERT(!buf->lb_buf);
+ LASSERT(!buf->lb_len);
+ buf->lb_buf = libcfs_kvzalloc(size, GFP_NOFS);
+ if (likely(buf->lb_buf))
+ buf->lb_len = size;
+}
+EXPORT_SYMBOL(lu_buf_alloc);
+
+void lu_buf_realloc(struct lu_buf *buf, size_t size)
+{
+ lu_buf_free(buf);
+ lu_buf_alloc(buf, size);
+}
+EXPORT_SYMBOL(lu_buf_realloc);
+
+struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len)
+{
+ if (!buf->lb_buf && !buf->lb_len)
+ lu_buf_alloc(buf, len);
+
+ if ((len > buf->lb_len) && buf->lb_buf)
+ lu_buf_realloc(buf, len);
+
+ return buf;
+}
+EXPORT_SYMBOL(lu_buf_check_and_alloc);
+
+/**
+ * Increase the size of the \a buf.
+ * preserves old data in buffer
+ * old buffer remains unchanged on error
+ * \retval 0 or -ENOMEM
+ */
+int lu_buf_check_and_grow(struct lu_buf *buf, size_t len)
+{
+ char *ptr;
+
+ if (len <= buf->lb_len)
+ return 0;
+
+ ptr = libcfs_kvzalloc(len, GFP_NOFS);
+ if (!ptr)
+ return -ENOMEM;
+
+ /* Free the old buf */
+ if (buf->lb_buf) {
+ memcpy(ptr, buf->lb_buf, buf->lb_len);
+ kvfree(buf->lb_buf);
+ }
+
+ buf->lb_buf = ptr;
+ buf->lb_len = len;
+ return 0;
+}