diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/fld/fld_cache.c')
-rw-r--r-- | drivers/staging/lustre/lustre/fld/fld_cache.c | 566 |
1 files changed, 566 insertions, 0 deletions
diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c new file mode 100644 index 000000000000..347f2ae83bc8 --- /dev/null +++ b/drivers/staging/lustre/lustre/fld/fld_cache.c @@ -0,0 +1,566 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2012, 2013, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/fld/fld_cache.c + * + * FLD (Fids Location Database) + * + * Author: Pravin Shelar <pravin.shelar@sun.com> + * Author: Yury Umanets <umka@clusterfs.com> + */ + +#define DEBUG_SUBSYSTEM S_FLD + +# include <linux/libcfs/libcfs.h> +# include <linux/module.h> +# include <linux/jbd.h> +# include <asm/div64.h> + +#include <obd.h> +#include <obd_class.h> +#include <lustre_ver.h> +#include <obd_support.h> +#include <lprocfs_status.h> + +#include <dt_object.h> +#include <md_object.h> +#include <lustre_req_layout.h> +#include <lustre_fld.h> +#include "fld_internal.h" + +/** + * create fld cache. + */ +struct fld_cache *fld_cache_init(const char *name, + int cache_size, int cache_threshold) +{ + struct fld_cache *cache; + ENTRY; + + LASSERT(name != NULL); + LASSERT(cache_threshold < cache_size); + + OBD_ALLOC_PTR(cache); + if (cache == NULL) + RETURN(ERR_PTR(-ENOMEM)); + + INIT_LIST_HEAD(&cache->fci_entries_head); + INIT_LIST_HEAD(&cache->fci_lru); + + cache->fci_cache_count = 0; + rwlock_init(&cache->fci_lock); + + strlcpy(cache->fci_name, name, + sizeof(cache->fci_name)); + + cache->fci_cache_size = cache_size; + cache->fci_threshold = cache_threshold; + + /* Init fld cache info. */ + memset(&cache->fci_stat, 0, sizeof(cache->fci_stat)); + + CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n", + cache->fci_name, cache_size, cache_threshold); + + RETURN(cache); +} + +/** + * destroy fld cache. + */ +void fld_cache_fini(struct fld_cache *cache) +{ + __u64 pct; + ENTRY; + + LASSERT(cache != NULL); + fld_cache_flush(cache); + + if (cache->fci_stat.fst_count > 0) { + pct = cache->fci_stat.fst_cache * 100; + do_div(pct, cache->fci_stat.fst_count); + } else { + pct = 0; + } + + CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name); + CDEBUG(D_INFO, " Total reqs: "LPU64"\n", cache->fci_stat.fst_count); + CDEBUG(D_INFO, " Cache reqs: "LPU64"\n", cache->fci_stat.fst_cache); + CDEBUG(D_INFO, " Cache hits: "LPU64"%%\n", pct); + + OBD_FREE_PTR(cache); + + EXIT; +} + +/** + * delete given node from list. + */ +void fld_cache_entry_delete(struct fld_cache *cache, + struct fld_cache_entry *node) +{ + list_del(&node->fce_list); + list_del(&node->fce_lru); + cache->fci_cache_count--; + OBD_FREE_PTR(node); +} + +/** + * fix list by checking new entry with NEXT entry in order. + */ +static void fld_fix_new_list(struct fld_cache *cache) +{ + struct fld_cache_entry *f_curr; + struct fld_cache_entry *f_next; + struct lu_seq_range *c_range; + struct lu_seq_range *n_range; + struct list_head *head = &cache->fci_entries_head; + ENTRY; + +restart_fixup: + + list_for_each_entry_safe(f_curr, f_next, head, fce_list) { + c_range = &f_curr->fce_range; + n_range = &f_next->fce_range; + + LASSERT(range_is_sane(c_range)); + if (&f_next->fce_list == head) + break; + + if (c_range->lsr_flags != n_range->lsr_flags) + continue; + + LASSERTF(c_range->lsr_start <= n_range->lsr_start, + "cur lsr_start "DRANGE" next lsr_start "DRANGE"\n", + PRANGE(c_range), PRANGE(n_range)); + + /* check merge possibility with next range */ + if (c_range->lsr_end == n_range->lsr_start) { + if (c_range->lsr_index != n_range->lsr_index) + continue; + n_range->lsr_start = c_range->lsr_start; + fld_cache_entry_delete(cache, f_curr); + continue; + } + + /* check if current range overlaps with next range. */ + if (n_range->lsr_start < c_range->lsr_end) { + if (c_range->lsr_index == n_range->lsr_index) { + n_range->lsr_start = c_range->lsr_start; + n_range->lsr_end = max(c_range->lsr_end, + n_range->lsr_end); + fld_cache_entry_delete(cache, f_curr); + } else { + if (n_range->lsr_end <= c_range->lsr_end) { + *n_range = *c_range; + fld_cache_entry_delete(cache, f_curr); + } else + n_range->lsr_start = c_range->lsr_end; + } + + /* we could have overlap over next + * range too. better restart. */ + goto restart_fixup; + } + + /* kill duplicates */ + if (c_range->lsr_start == n_range->lsr_start && + c_range->lsr_end == n_range->lsr_end) + fld_cache_entry_delete(cache, f_curr); + } + + EXIT; +} + +/** + * add node to fld cache + */ +static inline void fld_cache_entry_add(struct fld_cache *cache, + struct fld_cache_entry *f_new, + struct list_head *pos) +{ + list_add(&f_new->fce_list, pos); + list_add(&f_new->fce_lru, &cache->fci_lru); + + cache->fci_cache_count++; + fld_fix_new_list(cache); +} + +/** + * Check if cache needs to be shrunk. If so - do it. + * Remove one entry in list and so on until cache is shrunk enough. + */ +static int fld_cache_shrink(struct fld_cache *cache) +{ + struct fld_cache_entry *flde; + struct list_head *curr; + int num = 0; + ENTRY; + + LASSERT(cache != NULL); + + if (cache->fci_cache_count < cache->fci_cache_size) + RETURN(0); + + curr = cache->fci_lru.prev; + + while (cache->fci_cache_count + cache->fci_threshold > + cache->fci_cache_size && curr != &cache->fci_lru) { + + flde = list_entry(curr, struct fld_cache_entry, fce_lru); + curr = curr->prev; + fld_cache_entry_delete(cache, flde); + num++; + } + + CDEBUG(D_INFO, "%s: FLD cache - Shrunk by " + "%d entries\n", cache->fci_name, num); + + RETURN(0); +} + +/** + * kill all fld cache entries. + */ +void fld_cache_flush(struct fld_cache *cache) +{ + ENTRY; + + write_lock(&cache->fci_lock); + cache->fci_cache_size = 0; + fld_cache_shrink(cache); + write_unlock(&cache->fci_lock); + + EXIT; +} + +/** + * punch hole in existing range. divide this range and add new + * entry accordingly. + */ + +void fld_cache_punch_hole(struct fld_cache *cache, + struct fld_cache_entry *f_curr, + struct fld_cache_entry *f_new) +{ + const struct lu_seq_range *range = &f_new->fce_range; + const seqno_t new_start = range->lsr_start; + const seqno_t new_end = range->lsr_end; + struct fld_cache_entry *fldt; + + ENTRY; + OBD_ALLOC_GFP(fldt, sizeof *fldt, GFP_ATOMIC); + if (!fldt) { + OBD_FREE_PTR(f_new); + EXIT; + /* overlap is not allowed, so dont mess up list. */ + return; + } + /* break f_curr RANGE into three RANGES: + * f_curr, f_new , fldt + */ + + /* f_new = *range */ + + /* fldt */ + fldt->fce_range.lsr_start = new_end; + fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end; + fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index; + + /* f_curr */ + f_curr->fce_range.lsr_end = new_start; + + /* add these two entries to list */ + fld_cache_entry_add(cache, f_new, &f_curr->fce_list); + fld_cache_entry_add(cache, fldt, &f_new->fce_list); + + /* no need to fixup */ + EXIT; +} + +/** + * handle range overlap in fld cache. + */ +static void fld_cache_overlap_handle(struct fld_cache *cache, + struct fld_cache_entry *f_curr, + struct fld_cache_entry *f_new) +{ + const struct lu_seq_range *range = &f_new->fce_range; + const seqno_t new_start = range->lsr_start; + const seqno_t new_end = range->lsr_end; + const mdsno_t mdt = range->lsr_index; + + /* this is overlap case, these case are checking overlapping with + * prev range only. fixup will handle overlaping with next range. */ + + if (f_curr->fce_range.lsr_index == mdt) { + f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start, + new_start); + + f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end, + new_end); + + OBD_FREE_PTR(f_new); + fld_fix_new_list(cache); + + } else if (new_start <= f_curr->fce_range.lsr_start && + f_curr->fce_range.lsr_end <= new_end) { + /* case 1: new range completely overshadowed existing range. + * e.g. whole range migrated. update fld cache entry */ + + f_curr->fce_range = *range; + OBD_FREE_PTR(f_new); + fld_fix_new_list(cache); + + } else if (f_curr->fce_range.lsr_start < new_start && + new_end < f_curr->fce_range.lsr_end) { + /* case 2: new range fit within existing range. */ + + fld_cache_punch_hole(cache, f_curr, f_new); + + } else if (new_end <= f_curr->fce_range.lsr_end) { + /* case 3: overlap: + * [new_start [c_start new_end) c_end) + */ + + LASSERT(new_start <= f_curr->fce_range.lsr_start); + + f_curr->fce_range.lsr_start = new_end; + fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev); + + } else if (f_curr->fce_range.lsr_start <= new_start) { + /* case 4: overlap: + * [c_start [new_start c_end) new_end) + */ + + LASSERT(f_curr->fce_range.lsr_end <= new_end); + + f_curr->fce_range.lsr_end = new_start; + fld_cache_entry_add(cache, f_new, &f_curr->fce_list); + } else + CERROR("NEW range ="DRANGE" curr = "DRANGE"\n", + PRANGE(range),PRANGE(&f_curr->fce_range)); +} + +struct fld_cache_entry +*fld_cache_entry_create(const struct lu_seq_range *range) +{ + struct fld_cache_entry *f_new; + + LASSERT(range_is_sane(range)); + + OBD_ALLOC_PTR(f_new); + if (!f_new) + RETURN(ERR_PTR(-ENOMEM)); + + f_new->fce_range = *range; + RETURN(f_new); +} + +/** + * Insert FLD entry in FLD cache. + * + * This function handles all cases of merging and breaking up of + * ranges. + */ +int fld_cache_insert_nolock(struct fld_cache *cache, + struct fld_cache_entry *f_new) +{ + struct fld_cache_entry *f_curr; + struct fld_cache_entry *n; + struct list_head *head; + struct list_head *prev = NULL; + const seqno_t new_start = f_new->fce_range.lsr_start; + const seqno_t new_end = f_new->fce_range.lsr_end; + __u32 new_flags = f_new->fce_range.lsr_flags; + ENTRY; + + /* + * Duplicate entries are eliminated in insert op. + * So we don't need to search new entry before starting + * insertion loop. + */ + + if (!cache->fci_no_shrink) + fld_cache_shrink(cache); + + head = &cache->fci_entries_head; + + list_for_each_entry_safe(f_curr, n, head, fce_list) { + /* add list if next is end of list */ + if (new_end < f_curr->fce_range.lsr_start || + (new_end == f_curr->fce_range.lsr_start && + new_flags != f_curr->fce_range.lsr_flags)) + break; + + prev = &f_curr->fce_list; + /* check if this range is to left of new range. */ + if (new_start < f_curr->fce_range.lsr_end && + new_flags == f_curr->fce_range.lsr_flags) { + fld_cache_overlap_handle(cache, f_curr, f_new); + goto out; + } + } + + if (prev == NULL) + prev = head; + + CDEBUG(D_INFO, "insert range "DRANGE"\n", PRANGE(&f_new->fce_range)); + /* Add new entry to cache and lru list. */ + fld_cache_entry_add(cache, f_new, prev); +out: + RETURN(0); +} + +int fld_cache_insert(struct fld_cache *cache, + const struct lu_seq_range *range) +{ + struct fld_cache_entry *flde; + int rc; + + flde = fld_cache_entry_create(range); + if (IS_ERR(flde)) + RETURN(PTR_ERR(flde)); + + write_lock(&cache->fci_lock); + rc = fld_cache_insert_nolock(cache, flde); + write_unlock(&cache->fci_lock); + if (rc) + OBD_FREE_PTR(flde); + + RETURN(rc); +} + +void fld_cache_delete_nolock(struct fld_cache *cache, + const struct lu_seq_range *range) +{ + struct fld_cache_entry *flde; + struct fld_cache_entry *tmp; + struct list_head *head; + + head = &cache->fci_entries_head; + list_for_each_entry_safe(flde, tmp, head, fce_list) { + /* add list if next is end of list */ + if (range->lsr_start == flde->fce_range.lsr_start || + (range->lsr_end == flde->fce_range.lsr_end && + range->lsr_flags == flde->fce_range.lsr_flags)) { + fld_cache_entry_delete(cache, flde); + break; + } + } +} + +/** + * Delete FLD entry in FLD cache. + * + */ +void fld_cache_delete(struct fld_cache *cache, + const struct lu_seq_range *range) +{ + write_lock(&cache->fci_lock); + fld_cache_delete_nolock(cache, range); + write_unlock(&cache->fci_lock); +} + +struct fld_cache_entry +*fld_cache_entry_lookup_nolock(struct fld_cache *cache, + struct lu_seq_range *range) +{ + struct fld_cache_entry *flde; + struct fld_cache_entry *got = NULL; + struct list_head *head; + + head = &cache->fci_entries_head; + list_for_each_entry(flde, head, fce_list) { + if (range->lsr_start == flde->fce_range.lsr_start || + (range->lsr_end == flde->fce_range.lsr_end && + range->lsr_flags == flde->fce_range.lsr_flags)) { + got = flde; + break; + } + } + + RETURN(got); +} + +/** + * lookup \a seq sequence for range in fld cache. + */ +struct fld_cache_entry +*fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range) +{ + struct fld_cache_entry *got = NULL; + ENTRY; + + read_lock(&cache->fci_lock); + got = fld_cache_entry_lookup_nolock(cache, range); + read_unlock(&cache->fci_lock); + RETURN(got); +} + +/** + * lookup \a seq sequence for range in fld cache. + */ +int fld_cache_lookup(struct fld_cache *cache, + const seqno_t seq, struct lu_seq_range *range) +{ + struct fld_cache_entry *flde; + struct fld_cache_entry *prev = NULL; + struct list_head *head; + ENTRY; + + read_lock(&cache->fci_lock); + head = &cache->fci_entries_head; + + cache->fci_stat.fst_count++; + list_for_each_entry(flde, head, fce_list) { + if (flde->fce_range.lsr_start > seq) { + if (prev != NULL) + *range = prev->fce_range; + break; + } + + prev = flde; + if (range_within(&flde->fce_range, seq)) { + *range = flde->fce_range; + + cache->fci_stat.fst_cache++; + read_unlock(&cache->fci_lock); + RETURN(0); + } + } + read_unlock(&cache->fci_lock); + RETURN(-ENOENT); +} |