staging: add Lustre file system client support

Lustre is the most deployed distributed file system in the HPC (High Performance Computing) world. The patch adds its client side support. The code is not very clean and needs to live in drivers/staging for some time for continuing cleanup work. See drivers/staging/lustre/TODO for details. The code is based on Lustre master commit faefbfc04 commit faefbfc0460bc00f2ee4c1c1c86aa1e39b9eea49 Author: Alex Zhuravlev <alexey.zhuravlev@intel.com> Date: Tue Apr 30 23:05:21 2013 +0400 LU-3244 utils: tunefs.lustre should preserve virgin label Plus a few under-review patches on Whamcloud gerrit: 3.8 kernel support: http://review.whamcloud.com/#change,5973 http://review.whamcloud.com/#change,5974 http://review.whamcloud.com/#change,5768 http://review.whamcloud.com/#change,5781 http://review.whamcloud.com/#change,5763 http://review.whamcloud.com/#change,5613 http://review.whamcloud.com/#change,5655 3.9 kernel support: http://review.whamcloud.com/#change,5898 http://review.whamcloud.com/#change,5899 Kconfig/Kbuild: http://review.whamcloud.com/#change,4646 http://review.whamcloud.com/#change,4644 libcfs cleanup: http://review.whamcloud.com/#change,2831 http://review.whamcloud.com/#change,4775 http://review.whamcloud.com/#change,4776 http://review.whamcloud.com/#change,4777 http://review.whamcloud.com/#change,4778 http://review.whamcloud.com/#change,4779 http://review.whamcloud.com/#change,4780 All starting/trailing whitespaces are removed, to match kernel coding style. Also ran scripts/cleanfile on all lustre source files. [maked the Kconfig depend on BROKEN as the recent procfs changes causes this to fail - gregkh] Signed-off-by: Peng Tao <tao.peng@emc.com> Signed-off-by: Andreas Dilger <andreas.dilger@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Peng Tao <bergwolf@gmail.com> 2013-05-02 16:46:55 +0800
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2013-05-14 13:54:50 -0400
commit: d7e09d0397e84eefbabfd9cb353221f3c6448d83 (patch)
tree: caeeaffa2967acfe0988dc65cf04089c0a27c7aa /drivers/staging/lustre/lustre/fld/fld_cache.c
parent: staging: vt6656: remove unused definitions (diff)
download: linux-dev-d7e09d0397e84eefbabfd9cb353221f3c6448d83.tar.xz
linux-dev-d7e09d0397e84eefbabfd9cb353221f3c6448d83.zip
1 files changed, 566 insertions, 0 deletions
diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c
new file mode 100644
index 000000000000..347f2ae83bc8
--- /dev/null
+++ b/drivers/staging/lustre/lustre/fld/fld_cache.c
@@ -0,0 +1,566 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fld/fld_cache.c
+ *
+ * FLD (Fids Location Database)
+ *
+ * Author: Pravin Shelar <pravin.shelar@sun.com>
+ * Author: Yury Umanets <umka@clusterfs.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_FLD
+
+# include <linux/libcfs/libcfs.h>
+# include <linux/module.h>
+# include <linux/jbd.h>
+# include <asm/div64.h>
+
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_ver.h>
+#include <obd_support.h>
+#include <lprocfs_status.h>
+
+#include <dt_object.h>
+#include <md_object.h>
+#include <lustre_req_layout.h>
+#include <lustre_fld.h>
+#include "fld_internal.h"
+
+/**
+ * create fld cache.
+ */
+struct fld_cache *fld_cache_init(const char *name,
+				 int cache_size, int cache_threshold)
+{
+	struct fld_cache *cache;
+	ENTRY;
+
+	LASSERT(name != NULL);
+	LASSERT(cache_threshold < cache_size);
+
+	OBD_ALLOC_PTR(cache);
+	if (cache == NULL)
+		RETURN(ERR_PTR(-ENOMEM));
+
+	INIT_LIST_HEAD(&cache->fci_entries_head);
+	INIT_LIST_HEAD(&cache->fci_lru);
+
+	cache->fci_cache_count = 0;
+	rwlock_init(&cache->fci_lock);
+
+	strlcpy(cache->fci_name, name,
+		sizeof(cache->fci_name));
+
+	cache->fci_cache_size = cache_size;
+	cache->fci_threshold = cache_threshold;
+
+	/* Init fld cache info. */
+	memset(&cache->fci_stat, 0, sizeof(cache->fci_stat));
+
+	CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n",
+	       cache->fci_name, cache_size, cache_threshold);
+
+	RETURN(cache);
+}
+
+/**
+ * destroy fld cache.
+ */
+void fld_cache_fini(struct fld_cache *cache)
+{
+	__u64 pct;
+	ENTRY;
+
+	LASSERT(cache != NULL);
+	fld_cache_flush(cache);
+
+	if (cache->fci_stat.fst_count > 0) {
+		pct = cache->fci_stat.fst_cache * 100;
+		do_div(pct, cache->fci_stat.fst_count);
+	} else {
+		pct = 0;
+	}
+
+	CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name);
+	CDEBUG(D_INFO, "  Total reqs: "LPU64"\n", cache->fci_stat.fst_count);
+	CDEBUG(D_INFO, "  Cache reqs: "LPU64"\n", cache->fci_stat.fst_cache);
+	CDEBUG(D_INFO, "  Cache hits: "LPU64"%%\n", pct);
+
+	OBD_FREE_PTR(cache);
+
+	EXIT;
+}
+
+/**
+ * delete given node from list.
+ */
+void fld_cache_entry_delete(struct fld_cache *cache,
+			    struct fld_cache_entry *node)
+{
+	list_del(&node->fce_list);
+	list_del(&node->fce_lru);
+	cache->fci_cache_count--;
+	OBD_FREE_PTR(node);
+}
+
+/**
+ * fix list by checking new entry with NEXT entry in order.
+ */
+static void fld_fix_new_list(struct fld_cache *cache)
+{
+	struct fld_cache_entry *f_curr;
+	struct fld_cache_entry *f_next;
+	struct lu_seq_range *c_range;
+	struct lu_seq_range *n_range;
+	struct list_head *head = &cache->fci_entries_head;
+	ENTRY;
+
+restart_fixup:
+
+	list_for_each_entry_safe(f_curr, f_next, head, fce_list) {
+		c_range = &f_curr->fce_range;
+		n_range = &f_next->fce_range;
+
+		LASSERT(range_is_sane(c_range));
+		if (&f_next->fce_list == head)
+			break;
+
+		if (c_range->lsr_flags != n_range->lsr_flags)
+			continue;
+
+		LASSERTF(c_range->lsr_start <= n_range->lsr_start,
+			 "cur lsr_start "DRANGE" next lsr_start "DRANGE"\n",
+			 PRANGE(c_range), PRANGE(n_range));
+
+		/* check merge possibility with next range */
+		if (c_range->lsr_end == n_range->lsr_start) {
+			if (c_range->lsr_index != n_range->lsr_index)
+				continue;
+			n_range->lsr_start = c_range->lsr_start;
+			fld_cache_entry_delete(cache, f_curr);
+			continue;
+		}
+
+		/* check if current range overlaps with next range. */
+		if (n_range->lsr_start < c_range->lsr_end) {
+			if (c_range->lsr_index == n_range->lsr_index) {
+				n_range->lsr_start = c_range->lsr_start;
+				n_range->lsr_end = max(c_range->lsr_end,
+						       n_range->lsr_end);
+				fld_cache_entry_delete(cache, f_curr);
+			} else {
+				if (n_range->lsr_end <= c_range->lsr_end) {
+					*n_range = *c_range;
+					fld_cache_entry_delete(cache, f_curr);
+				} else
+					n_range->lsr_start = c_range->lsr_end;
+			}
+
+			/* we could have overlap over next
+			 * range too. better restart. */
+			goto restart_fixup;
+		}
+
+		/* kill duplicates */
+		if (c_range->lsr_start == n_range->lsr_start &&
+		    c_range->lsr_end == n_range->lsr_end)
+			fld_cache_entry_delete(cache, f_curr);
+	}
+
+	EXIT;
+}
+
+/**
+ * add node to fld cache
+ */
+static inline void fld_cache_entry_add(struct fld_cache *cache,
+				       struct fld_cache_entry *f_new,
+				       struct list_head *pos)
+{
+	list_add(&f_new->fce_list, pos);
+	list_add(&f_new->fce_lru, &cache->fci_lru);
+
+	cache->fci_cache_count++;
+	fld_fix_new_list(cache);
+}
+
+/**
+ * Check if cache needs to be shrunk. If so - do it.
+ * Remove one entry in list and so on until cache is shrunk enough.
+ */
+static int fld_cache_shrink(struct fld_cache *cache)
+{
+	struct fld_cache_entry *flde;
+	struct list_head *curr;
+	int num = 0;
+	ENTRY;
+
+	LASSERT(cache != NULL);
+
+	if (cache->fci_cache_count < cache->fci_cache_size)
+		RETURN(0);
+
+	curr = cache->fci_lru.prev;
+
+	while (cache->fci_cache_count + cache->fci_threshold >
+	       cache->fci_cache_size && curr != &cache->fci_lru) {
+
+		flde = list_entry(curr, struct fld_cache_entry, fce_lru);
+		curr = curr->prev;
+		fld_cache_entry_delete(cache, flde);
+		num++;
+	}
+
+	CDEBUG(D_INFO, "%s: FLD cache - Shrunk by "
+	       "%d entries\n", cache->fci_name, num);
+
+	RETURN(0);
+}
+
+/**
+ * kill all fld cache entries.
+ */
+void fld_cache_flush(struct fld_cache *cache)
+{
+	ENTRY;
+
+	write_lock(&cache->fci_lock);
+	cache->fci_cache_size = 0;
+	fld_cache_shrink(cache);
+	write_unlock(&cache->fci_lock);
+
+	EXIT;
+}
+
+/**
+ * punch hole in existing range. divide this range and add new
+ * entry accordingly.
+ */
+
+void fld_cache_punch_hole(struct fld_cache *cache,
+			  struct fld_cache_entry *f_curr,
+			  struct fld_cache_entry *f_new)
+{
+	const struct lu_seq_range *range = &f_new->fce_range;
+	const seqno_t new_start  = range->lsr_start;
+	const seqno_t new_end  = range->lsr_end;
+	struct fld_cache_entry *fldt;
+
+	ENTRY;
+	OBD_ALLOC_GFP(fldt, sizeof *fldt, GFP_ATOMIC);
+	if (!fldt) {
+		OBD_FREE_PTR(f_new);
+		EXIT;
+		/* overlap is not allowed, so dont mess up list. */
+		return;
+	}
+	/*  break f_curr RANGE into three RANGES:
+	 *	f_curr, f_new , fldt
+	 */
+
+	/* f_new = *range */
+
+	/* fldt */
+	fldt->fce_range.lsr_start = new_end;
+	fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end;
+	fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index;
+
+	/* f_curr */
+	f_curr->fce_range.lsr_end = new_start;
+
+	/* add these two entries to list */
+	fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
+	fld_cache_entry_add(cache, fldt, &f_new->fce_list);
+
+	/* no need to fixup */
+	EXIT;
+}
+
+/**
+ * handle range overlap in fld cache.
+ */
+static void fld_cache_overlap_handle(struct fld_cache *cache,
+				struct fld_cache_entry *f_curr,
+				struct fld_cache_entry *f_new)
+{
+	const struct lu_seq_range *range = &f_new->fce_range;
+	const seqno_t new_start  = range->lsr_start;
+	const seqno_t new_end  = range->lsr_end;
+	const mdsno_t mdt = range->lsr_index;
+
+	/* this is overlap case, these case are checking overlapping with
+	 * prev range only. fixup will handle overlaping with next range. */
+
+	if (f_curr->fce_range.lsr_index == mdt) {
+		f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start,
+						  new_start);
+
+		f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end,
+						new_end);
+
+		OBD_FREE_PTR(f_new);
+		fld_fix_new_list(cache);
+
+	} else if (new_start <= f_curr->fce_range.lsr_start &&
+			f_curr->fce_range.lsr_end <= new_end) {
+		/* case 1: new range completely overshadowed existing range.
+		 *	 e.g. whole range migrated. update fld cache entry */
+
+		f_curr->fce_range = *range;
+		OBD_FREE_PTR(f_new);
+		fld_fix_new_list(cache);
+
+	} else if (f_curr->fce_range.lsr_start < new_start &&
+			new_end < f_curr->fce_range.lsr_end) {
+		/* case 2: new range fit within existing range. */
+
+		fld_cache_punch_hole(cache, f_curr, f_new);
+
+	} else  if (new_end <= f_curr->fce_range.lsr_end) {
+		/* case 3: overlap:
+		 *	 [new_start [c_start  new_end)  c_end)
+		 */
+
+		LASSERT(new_start <= f_curr->fce_range.lsr_start);
+
+		f_curr->fce_range.lsr_start = new_end;
+		fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev);
+
+	} else if (f_curr->fce_range.lsr_start <= new_start) {
+		/* case 4: overlap:
+		 *	 [c_start [new_start c_end) new_end)
+		 */
+
+		LASSERT(f_curr->fce_range.lsr_end <= new_end);
+
+		f_curr->fce_range.lsr_end = new_start;
+		fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
+	} else
+		CERROR("NEW range ="DRANGE" curr = "DRANGE"\n",
+		       PRANGE(range),PRANGE(&f_curr->fce_range));
+}
+
+struct fld_cache_entry
+*fld_cache_entry_create(const struct lu_seq_range *range)
+{
+	struct fld_cache_entry *f_new;
+
+	LASSERT(range_is_sane(range));
+
+	OBD_ALLOC_PTR(f_new);
+	if (!f_new)
+		RETURN(ERR_PTR(-ENOMEM));
+
+	f_new->fce_range = *range;
+	RETURN(f_new);
+}
+
+/**
+ * Insert FLD entry in FLD cache.
+ *
+ * This function handles all cases of merging and breaking up of
+ * ranges.
+ */
+int fld_cache_insert_nolock(struct fld_cache *cache,
+			    struct fld_cache_entry *f_new)
+{
+	struct fld_cache_entry *f_curr;
+	struct fld_cache_entry *n;
+	struct list_head *head;
+	struct list_head *prev = NULL;
+	const seqno_t new_start  = f_new->fce_range.lsr_start;
+	const seqno_t new_end  = f_new->fce_range.lsr_end;
+	__u32 new_flags  = f_new->fce_range.lsr_flags;
+	ENTRY;
+
+	/*
+	 * Duplicate entries are eliminated in insert op.
+	 * So we don't need to search new entry before starting
+	 * insertion loop.
+	 */
+
+	if (!cache->fci_no_shrink)
+		fld_cache_shrink(cache);
+
+	head = &cache->fci_entries_head;
+
+	list_for_each_entry_safe(f_curr, n, head, fce_list) {
+		/* add list if next is end of list */
+		if (new_end < f_curr->fce_range.lsr_start ||
+		   (new_end == f_curr->fce_range.lsr_start &&
+		    new_flags != f_curr->fce_range.lsr_flags))
+			break;
+
+		prev = &f_curr->fce_list;
+		/* check if this range is to left of new range. */
+		if (new_start < f_curr->fce_range.lsr_end &&
+		    new_flags == f_curr->fce_range.lsr_flags) {
+			fld_cache_overlap_handle(cache, f_curr, f_new);
+			goto out;
+		}
+	}
+
+	if (prev == NULL)
+		prev = head;
+
+	CDEBUG(D_INFO, "insert range "DRANGE"\n", PRANGE(&f_new->fce_range));
+	/* Add new entry to cache and lru list. */
+	fld_cache_entry_add(cache, f_new, prev);
+out:
+	RETURN(0);
+}
+
+int fld_cache_insert(struct fld_cache *cache,
+		     const struct lu_seq_range *range)
+{
+	struct fld_cache_entry	*flde;
+	int rc;
+
+	flde = fld_cache_entry_create(range);
+	if (IS_ERR(flde))
+		RETURN(PTR_ERR(flde));
+
+	write_lock(&cache->fci_lock);
+	rc = fld_cache_insert_nolock(cache, flde);
+	write_unlock(&cache->fci_lock);
+	if (rc)
+		OBD_FREE_PTR(flde);
+
+	RETURN(rc);
+}
+
+void fld_cache_delete_nolock(struct fld_cache *cache,
+		      const struct lu_seq_range *range)
+{
+	struct fld_cache_entry *flde;
+	struct fld_cache_entry *tmp;
+	struct list_head *head;
+
+	head = &cache->fci_entries_head;
+	list_for_each_entry_safe(flde, tmp, head, fce_list) {
+		/* add list if next is end of list */
+		if (range->lsr_start == flde->fce_range.lsr_start ||
+		   (range->lsr_end == flde->fce_range.lsr_end &&
+		    range->lsr_flags == flde->fce_range.lsr_flags)) {
+			fld_cache_entry_delete(cache, flde);
+			break;
+		}
+	}
+}
+
+/**
+ * Delete FLD entry in FLD cache.
+ *
+ */
+void fld_cache_delete(struct fld_cache *cache,
+		      const struct lu_seq_range *range)
+{
+	write_lock(&cache->fci_lock);
+	fld_cache_delete_nolock(cache, range);
+	write_unlock(&cache->fci_lock);
+}
+
+struct fld_cache_entry
+*fld_cache_entry_lookup_nolock(struct fld_cache *cache,
+			      struct lu_seq_range *range)
+{
+	struct fld_cache_entry *flde;
+	struct fld_cache_entry *got = NULL;
+	struct list_head *head;
+
+	head = &cache->fci_entries_head;
+	list_for_each_entry(flde, head, fce_list) {
+		if (range->lsr_start == flde->fce_range.lsr_start ||
+		   (range->lsr_end == flde->fce_range.lsr_end &&
+		    range->lsr_flags == flde->fce_range.lsr_flags)) {
+			got = flde;
+			break;
+		}
+	}
+
+	RETURN(got);
+}
+
+/**
+ * lookup \a seq sequence for range in fld cache.
+ */
+struct fld_cache_entry
+*fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range)
+{
+	struct fld_cache_entry *got = NULL;
+	ENTRY;
+
+	read_lock(&cache->fci_lock);
+	got = fld_cache_entry_lookup_nolock(cache, range);
+	read_unlock(&cache->fci_lock);
+	RETURN(got);
+}
+
+/**
+ * lookup \a seq sequence for range in fld cache.
+ */
+int fld_cache_lookup(struct fld_cache *cache,
+		     const seqno_t seq, struct lu_seq_range *range)
+{
+	struct fld_cache_entry *flde;
+	struct fld_cache_entry *prev = NULL;
+	struct list_head *head;
+	ENTRY;
+
+	read_lock(&cache->fci_lock);
+	head = &cache->fci_entries_head;
+
+	cache->fci_stat.fst_count++;
+	list_for_each_entry(flde, head, fce_list) {
+		if (flde->fce_range.lsr_start > seq) {
+			if (prev != NULL)
+				*range = prev->fce_range;
+			break;
+		}
+
+		prev = flde;
+		if (range_within(&flde->fce_range, seq)) {
+			*range = flde->fce_range;
+
+			cache->fci_stat.fst_cache++;
+			read_unlock(&cache->fci_lock);
+			RETURN(0);
+		}
+	}
+	read_unlock(&cache->fci_lock);
+	RETURN(-ENOENT);
+}
author	Peng Tao <bergwolf@gmail.com>	2013-05-02 16:46:55 +0800
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2013-05-14 13:54:50 -0400
commit	d7e09d0397e84eefbabfd9cb353221f3c6448d83 (patch)
tree	caeeaffa2967acfe0988dc65cf04089c0a27c7aa /drivers/staging/lustre/lustre/fld/fld_cache.c
parent	staging: vt6656: remove unused definitions (diff)
download	linux-dev-d7e09d0397e84eefbabfd9cb353221f3c6448d83.tar.xz linux-dev-d7e09d0397e84eefbabfd9cb353221f3c6448d83.zip