diff options
Diffstat (limited to 'drivers/staging/zcache/zbud.c')
-rw-r--r-- | drivers/staging/zcache/zbud.c | 1066 |
1 files changed, 0 insertions, 1066 deletions
diff --git a/drivers/staging/zcache/zbud.c b/drivers/staging/zcache/zbud.c deleted file mode 100644 index 6cda4ed9ed31..000000000000 --- a/drivers/staging/zcache/zbud.c +++ /dev/null @@ -1,1066 +0,0 @@ -/* - * zbud.c - Compression buddies allocator - * - * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp. - * - * Compression buddies ("zbud") provides for efficiently packing two - * (or, possibly in the future, more) compressed pages ("zpages") into - * a single "raw" pageframe and for tracking both zpages and pageframes - * so that whole pageframes can be easily reclaimed in LRU-like order. - * It is designed to be used in conjunction with transcendent memory - * ("tmem"); for example separate LRU lists are maintained for persistent - * vs. ephemeral pages. - * - * A zbudpage is an overlay for a struct page and thus each zbudpage - * refers to a physical pageframe of RAM. When the caller passes a - * struct page from the kernel's page allocator, zbud "transforms" it - * to a zbudpage which sets/uses a different set of fields than the - * struct-page and thus must "untransform" it back by reinitializing - * certain fields before the struct-page can be freed. The fields - * of a zbudpage include a page lock for controlling access to the - * corresponding pageframe, and there is a size field for each zpage. - * Each zbudpage also lives on two linked lists: a "budlist" which is - * used to support efficient buddying of zpages; and an "lru" which - * is used for reclaiming pageframes in approximately least-recently-used - * order. - * - * A zbudpageframe is a pageframe divided up into aligned 64-byte "chunks" - * which contain the compressed data for zero, one, or two zbuds. Contained - * with the compressed data is a tmem_handle which is a key to allow - * the same data to be found via the tmem interface so the zpage can - * be invalidated (for ephemeral pages) or repatriated to the swap cache - * (for persistent pages). The contents of a zbudpageframe must never - * be accessed without holding the page lock for the corresponding - * zbudpage and, to accomodate highmem machines, the contents may - * only be examined or changes when kmapped. Thus, when in use, a - * kmapped zbudpageframe is referred to in the zbud code as "void *zbpg". - * - * Note that the term "zbud" refers to the combination of a zpage and - * a tmem_handle that is stored as one of possibly two "buddied" zpages; - * it also generically refers to this allocator... sorry for any confusion. - * - * A zbudref is a pointer to a struct zbudpage (which can be cast to a - * struct page), with the LSB either cleared or set to indicate, respectively, - * the first or second zpage in the zbudpageframe. Since a zbudref can be - * cast to a pointer, it is used as the tmem "pampd" pointer and uniquely - * references a stored tmem page and so is the only zbud data structure - * externally visible to zbud.c/zbud.h. - * - * Since we wish to reclaim entire pageframes but zpages may be randomly - * added and deleted to any given pageframe, we approximate LRU by - * promoting a pageframe to MRU when a zpage is added to it, but - * leaving it at the current place in the list when a zpage is deleted - * from it. As a side effect, zpages that are difficult to buddy (e.g. - * very large paages) will be reclaimed faster than average, which seems - * reasonable. - * - * In the current implementation, no more than two zpages may be stored in - * any pageframe and no zpage ever crosses a pageframe boundary. While - * other zpage allocation mechanisms may allow greater density, this two - * zpage-per-pageframe limit both ensures simple reclaim of pageframes - * (including garbage collection of references to the contents of those - * pageframes from tmem data structures) AND avoids the need for compaction. - * With additional complexity, zbud could be modified to support storing - * up to three zpages per pageframe or, to handle larger average zpages, - * up to three zpages per pair of pageframes, but it is not clear if the - * additional complexity would be worth it. So consider it an exercise - * for future developers. - * - * Note also that zbud does no page allocation or freeing. This is so - * that the caller has complete control over and, for accounting, visibility - * into if/when pages are allocated and freed. - * - * Finally, note that zbud limits the size of zpages it can store; the - * caller must check the zpage size with zbud_max_buddy_size before - * storing it, else BUGs will result. User beware. - */ - -#include <linux/module.h> -#include <linux/highmem.h> -#include <linux/list.h> -#include <linux/spinlock.h> -#include <linux/pagemap.h> -#include <linux/atomic.h> -#include <linux/bug.h> -#include "tmem.h" -#include "zcache.h" -#include "zbud.h" - -/* - * We need to ensure that a struct zbudpage is never larger than a - * struct page. This is checked with a BUG_ON in zbud_init. - * - * The unevictable field indicates that a zbud is being added to the - * zbudpage. Since this is a two-phase process (due to tmem locking), - * this field locks the zbudpage against eviction when a zbud match - * or creation is in process. Since this addition process may occur - * in parallel for two zbuds in one zbudpage, the field is a counter - * that must not exceed two. - */ -struct zbudpage { - union { - struct page page; - struct { - unsigned long space_for_flags; - struct { - unsigned zbud0_size:PAGE_SHIFT; - unsigned zbud1_size:PAGE_SHIFT; - unsigned unevictable:2; - }; - struct list_head budlist; - struct list_head lru; - }; - }; -}; -#if (PAGE_SHIFT * 2) + 2 > BITS_PER_LONG -#error "zbud won't work for this arch, PAGE_SIZE is too large" -#endif - -struct zbudref { - union { - struct zbudpage *zbudpage; - unsigned long zbudref; - }; -}; - -#define CHUNK_SHIFT 6 -#define CHUNK_SIZE (1 << CHUNK_SHIFT) -#define CHUNK_MASK (~(CHUNK_SIZE-1)) -#define NCHUNKS (PAGE_SIZE >> CHUNK_SHIFT) -#define MAX_CHUNK (NCHUNKS-1) - -/* - * The following functions deal with the difference between struct - * page and struct zbudpage. Note the hack of using the pageflags - * from struct page; this is to avoid duplicating all the complex - * pageflag macros. - */ -static inline void zbudpage_spin_lock(struct zbudpage *zbudpage) -{ - struct page *page = (struct page *)zbudpage; - - while (unlikely(test_and_set_bit_lock(PG_locked, &page->flags))) { - do { - cpu_relax(); - } while (test_bit(PG_locked, &page->flags)); - } -} - -static inline void zbudpage_spin_unlock(struct zbudpage *zbudpage) -{ - struct page *page = (struct page *)zbudpage; - - clear_bit(PG_locked, &page->flags); -} - -static inline int zbudpage_spin_trylock(struct zbudpage *zbudpage) -{ - return trylock_page((struct page *)zbudpage); -} - -static inline int zbudpage_is_locked(struct zbudpage *zbudpage) -{ - return PageLocked((struct page *)zbudpage); -} - -static inline void *kmap_zbudpage_atomic(struct zbudpage *zbudpage) -{ - return kmap_atomic((struct page *)zbudpage); -} - -/* - * A dying zbudpage is an ephemeral page in the process of being evicted. - * Any data contained in the zbudpage is invalid and we are just waiting for - * the tmem pampds to be invalidated before freeing the page - */ -static inline int zbudpage_is_dying(struct zbudpage *zbudpage) -{ - struct page *page = (struct page *)zbudpage; - - return test_bit(PG_reclaim, &page->flags); -} - -static inline void zbudpage_set_dying(struct zbudpage *zbudpage) -{ - struct page *page = (struct page *)zbudpage; - - set_bit(PG_reclaim, &page->flags); -} - -static inline void zbudpage_clear_dying(struct zbudpage *zbudpage) -{ - struct page *page = (struct page *)zbudpage; - - clear_bit(PG_reclaim, &page->flags); -} - -/* - * A zombie zbudpage is a persistent page in the process of being evicted. - * The data contained in the zbudpage is valid and we are just waiting for - * the tmem pampds to be invalidated before freeing the page - */ -static inline int zbudpage_is_zombie(struct zbudpage *zbudpage) -{ - struct page *page = (struct page *)zbudpage; - - return test_bit(PG_dirty, &page->flags); -} - -static inline void zbudpage_set_zombie(struct zbudpage *zbudpage) -{ - struct page *page = (struct page *)zbudpage; - - set_bit(PG_dirty, &page->flags); -} - -static inline void zbudpage_clear_zombie(struct zbudpage *zbudpage) -{ - struct page *page = (struct page *)zbudpage; - - clear_bit(PG_dirty, &page->flags); -} - -static inline void kunmap_zbudpage_atomic(void *zbpg) -{ - kunmap_atomic(zbpg); -} - -/* - * zbud "translation" and helper functions - */ - -static inline struct zbudpage *zbudref_to_zbudpage(struct zbudref *zref) -{ - unsigned long zbud = (unsigned long)zref; - zbud &= ~1UL; - return (struct zbudpage *)zbud; -} - -static inline struct zbudref *zbudpage_to_zbudref(struct zbudpage *zbudpage, - unsigned budnum) -{ - unsigned long zbud = (unsigned long)zbudpage; - BUG_ON(budnum > 1); - zbud |= budnum; - return (struct zbudref *)zbud; -} - -static inline int zbudref_budnum(struct zbudref *zbudref) -{ - unsigned long zbud = (unsigned long)zbudref; - return zbud & 1UL; -} - -static inline unsigned zbud_max_size(void) -{ - return MAX_CHUNK << CHUNK_SHIFT; -} - -static inline unsigned zbud_size_to_chunks(unsigned size) -{ - BUG_ON(size == 0 || size > zbud_max_size()); - return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; -} - -/* can only be used between kmap_zbudpage_atomic/kunmap_zbudpage_atomic! */ -static inline char *zbud_data(void *zbpg, - unsigned budnum, unsigned size) -{ - char *p; - - BUG_ON(size == 0 || size > zbud_max_size()); - p = (char *)zbpg; - if (budnum == 1) - p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK); - return p; -} - -/* - * These are all informative and exposed through debugfs... except for - * the arrays... anyone know how to do that? To avoid confusion for - * debugfs viewers, some of these should also be atomic_long_t, but - * I don't know how to expose atomics via debugfs either... - */ -static ssize_t zbud_eph_pageframes; -static ssize_t zbud_pers_pageframes; -static ssize_t zbud_eph_zpages; -static ssize_t zbud_pers_zpages; -static u64 zbud_eph_zbytes; -static u64 zbud_pers_zbytes; -static ssize_t zbud_eph_evicted_pageframes; -static ssize_t zbud_pers_evicted_pageframes; -static ssize_t zbud_eph_cumul_zpages; -static ssize_t zbud_pers_cumul_zpages; -static u64 zbud_eph_cumul_zbytes; -static u64 zbud_pers_cumul_zbytes; -static ssize_t zbud_eph_cumul_chunk_counts[NCHUNKS]; -static ssize_t zbud_pers_cumul_chunk_counts[NCHUNKS]; -static ssize_t zbud_eph_buddied_count; -static ssize_t zbud_pers_buddied_count; -static ssize_t zbud_eph_unbuddied_count; -static ssize_t zbud_pers_unbuddied_count; -static ssize_t zbud_eph_zombie_count; -static ssize_t zbud_pers_zombie_count; -static atomic_t zbud_eph_zombie_atomic; -static atomic_t zbud_pers_zombie_atomic; - -#ifdef CONFIG_DEBUG_FS -#include <linux/debugfs.h> -#define zdfs debugfs_create_size_t -#define zdfs64 debugfs_create_u64 -static int zbud_debugfs_init(void) -{ - struct dentry *root = debugfs_create_dir("zbud", NULL); - if (root == NULL) - return -ENXIO; - - /* - * would be nice to dump the sizes of the unbuddied - * arrays, like was done with sysfs, but it doesn't - * look like debugfs is flexible enough to do that - */ - zdfs64("eph_zbytes", S_IRUGO, root, &zbud_eph_zbytes); - zdfs64("eph_cumul_zbytes", S_IRUGO, root, &zbud_eph_cumul_zbytes); - zdfs64("pers_zbytes", S_IRUGO, root, &zbud_pers_zbytes); - zdfs64("pers_cumul_zbytes", S_IRUGO, root, &zbud_pers_cumul_zbytes); - zdfs("eph_cumul_zpages", S_IRUGO, root, &zbud_eph_cumul_zpages); - zdfs("eph_evicted_pageframes", S_IRUGO, root, - &zbud_eph_evicted_pageframes); - zdfs("eph_zpages", S_IRUGO, root, &zbud_eph_zpages); - zdfs("eph_pageframes", S_IRUGO, root, &zbud_eph_pageframes); - zdfs("eph_buddied_count", S_IRUGO, root, &zbud_eph_buddied_count); - zdfs("eph_unbuddied_count", S_IRUGO, root, &zbud_eph_unbuddied_count); - zdfs("pers_cumul_zpages", S_IRUGO, root, &zbud_pers_cumul_zpages); - zdfs("pers_evicted_pageframes", S_IRUGO, root, - &zbud_pers_evicted_pageframes); - zdfs("pers_zpages", S_IRUGO, root, &zbud_pers_zpages); - zdfs("pers_pageframes", S_IRUGO, root, &zbud_pers_pageframes); - zdfs("pers_buddied_count", S_IRUGO, root, &zbud_pers_buddied_count); - zdfs("pers_unbuddied_count", S_IRUGO, root, &zbud_pers_unbuddied_count); - zdfs("pers_zombie_count", S_IRUGO, root, &zbud_pers_zombie_count); - return 0; -} -#undef zdfs -#undef zdfs64 -#else -static inline int zbud_debugfs_init(void) -{ - return 0; -} -#endif - -/* protects the buddied list and all unbuddied lists */ -static DEFINE_SPINLOCK(zbud_eph_lists_lock); -static DEFINE_SPINLOCK(zbud_pers_lists_lock); - -struct zbud_unbuddied { - struct list_head list; - unsigned count; -}; - -/* list N contains pages with N chunks USED and NCHUNKS-N unused */ -/* element 0 is never used but optimizing that isn't worth it */ -static struct zbud_unbuddied zbud_eph_unbuddied[NCHUNKS]; -static struct zbud_unbuddied zbud_pers_unbuddied[NCHUNKS]; -static LIST_HEAD(zbud_eph_lru_list); -static LIST_HEAD(zbud_pers_lru_list); -static LIST_HEAD(zbud_eph_buddied_list); -static LIST_HEAD(zbud_pers_buddied_list); -static LIST_HEAD(zbud_eph_zombie_list); -static LIST_HEAD(zbud_pers_zombie_list); - -/* - * Given a struct page, transform it to a zbudpage so that it can be - * used by zbud and initialize fields as necessary. - */ -static inline struct zbudpage *zbud_init_zbudpage(struct page *page, bool eph) -{ - struct zbudpage *zbudpage = (struct zbudpage *)page; - - BUG_ON(page == NULL); - INIT_LIST_HEAD(&zbudpage->budlist); - INIT_LIST_HEAD(&zbudpage->lru); - zbudpage->zbud0_size = 0; - zbudpage->zbud1_size = 0; - zbudpage->unevictable = 0; - if (eph) - zbud_eph_pageframes++; - else - zbud_pers_pageframes++; - return zbudpage; -} - -/* "Transform" a zbudpage back to a struct page suitable to free. */ -static inline struct page *zbud_unuse_zbudpage(struct zbudpage *zbudpage, - bool eph) -{ - struct page *page = (struct page *)zbudpage; - - BUG_ON(!list_empty(&zbudpage->budlist)); - BUG_ON(!list_empty(&zbudpage->lru)); - BUG_ON(zbudpage->zbud0_size != 0); - BUG_ON(zbudpage->zbud1_size != 0); - BUG_ON(!PageLocked(page)); - BUG_ON(zbudpage->unevictable != 0); - BUG_ON(zbudpage_is_dying(zbudpage)); - BUG_ON(zbudpage_is_zombie(zbudpage)); - if (eph) - zbud_eph_pageframes--; - else - zbud_pers_pageframes--; - zbudpage_spin_unlock(zbudpage); - page_mapcount_reset(page); - init_page_count(page); - page->index = 0; - return page; -} - -/* Mark a zbud as unused and do accounting */ -static inline void zbud_unuse_zbud(struct zbudpage *zbudpage, - int budnum, bool eph) -{ - unsigned size; - - BUG_ON(!zbudpage_is_locked(zbudpage)); - if (budnum == 0) { - size = zbudpage->zbud0_size; - zbudpage->zbud0_size = 0; - } else { - size = zbudpage->zbud1_size; - zbudpage->zbud1_size = 0; - } - if (eph) { - zbud_eph_zbytes -= size; - zbud_eph_zpages--; - } else { - zbud_pers_zbytes -= size; - zbud_pers_zpages--; - } -} - -/* - * Given a zbudpage/budnum/size, a tmem handle, and a kmapped pointer - * to some data, set up the zbud appropriately including data copying - * and accounting. Note that if cdata is NULL, the data copying is - * skipped. (This is useful for lazy writes such as for RAMster.) - */ -static void zbud_init_zbud(struct zbudpage *zbudpage, struct tmem_handle *th, - bool eph, void *cdata, - unsigned budnum, unsigned size) -{ - char *to; - void *zbpg; - struct tmem_handle *to_th; - unsigned nchunks = zbud_size_to_chunks(size); - - BUG_ON(!zbudpage_is_locked(zbudpage)); - zbpg = kmap_zbudpage_atomic(zbudpage); - to = zbud_data(zbpg, budnum, size); - to_th = (struct tmem_handle *)to; - to_th->index = th->index; - to_th->oid = th->oid; - to_th->pool_id = th->pool_id; - to_th->client_id = th->client_id; - to += sizeof(struct tmem_handle); - if (cdata != NULL) - memcpy(to, cdata, size - sizeof(struct tmem_handle)); - kunmap_zbudpage_atomic(zbpg); - if (budnum == 0) - zbudpage->zbud0_size = size; - else - zbudpage->zbud1_size = size; - if (eph) { - zbud_eph_cumul_chunk_counts[nchunks]++; - zbud_eph_zpages++; - zbud_eph_cumul_zpages++; - zbud_eph_zbytes += size; - zbud_eph_cumul_zbytes += size; - } else { - zbud_pers_cumul_chunk_counts[nchunks]++; - zbud_pers_zpages++; - zbud_pers_cumul_zpages++; - zbud_pers_zbytes += size; - zbud_pers_cumul_zbytes += size; - } -} - -/* - * Given a locked dying zbudpage, read out the tmem handles from the data, - * unlock the page, then use the handles to tell tmem to flush out its - * references - */ -static void zbud_evict_tmem(struct zbudpage *zbudpage) -{ - int i, j; - uint32_t pool_id[2], client_id[2]; - uint32_t index[2]; - struct tmem_oid oid[2]; - struct tmem_pool *pool; - void *zbpg; - struct tmem_handle *th; - unsigned size; - - /* read out the tmem handles from the data and set aside */ - zbpg = kmap_zbudpage_atomic(zbudpage); - for (i = 0, j = 0; i < 2; i++) { - size = (i == 0) ? zbudpage->zbud0_size : zbudpage->zbud1_size; - if (size) { - th = (struct tmem_handle *)zbud_data(zbpg, i, size); - client_id[j] = th->client_id; - pool_id[j] = th->pool_id; - oid[j] = th->oid; - index[j] = th->index; - j++; - zbud_unuse_zbud(zbudpage, i, true); - } - } - kunmap_zbudpage_atomic(zbpg); - zbudpage_spin_unlock(zbudpage); - /* zbudpage is now an unlocked dying... tell tmem to flush pointers */ - for (i = 0; i < j; i++) { - pool = zcache_get_pool_by_id(client_id[i], pool_id[i]); - if (pool != NULL) { - tmem_flush_page(pool, &oid[i], index[i]); - zcache_put_pool(pool); - } - } -} - -/* - * Externally callable zbud handling routines. - */ - -/* - * Return the maximum size compressed page that can be stored (secretly - * setting aside space for the tmem handle. - */ -unsigned int zbud_max_buddy_size(void) -{ - return zbud_max_size() - sizeof(struct tmem_handle); -} - -/* - * Given a zbud reference, free the corresponding zbud from all lists, - * mark it as unused, do accounting, and if the freeing of the zbud - * frees up an entire pageframe, return it to the caller (else NULL). - */ -struct page *zbud_free_and_delist(struct zbudref *zref, bool eph, - unsigned int *zsize, unsigned int *zpages) -{ - unsigned long budnum = zbudref_budnum(zref); - struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); - struct page *page = NULL; - unsigned chunks, bud_size, other_bud_size; - spinlock_t *lists_lock = - eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; - struct zbud_unbuddied *unbud = - eph ? zbud_eph_unbuddied : zbud_pers_unbuddied; - - - spin_lock(lists_lock); - zbudpage_spin_lock(zbudpage); - if (zbudpage_is_dying(zbudpage)) { - /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ - zbudpage_spin_unlock(zbudpage); - spin_unlock(lists_lock); - *zpages = 0; - *zsize = 0; - goto out; - } - if (budnum == 0) { - bud_size = zbudpage->zbud0_size; - other_bud_size = zbudpage->zbud1_size; - } else { - bud_size = zbudpage->zbud1_size; - other_bud_size = zbudpage->zbud0_size; - } - *zsize = bud_size - sizeof(struct tmem_handle); - *zpages = 1; - zbud_unuse_zbud(zbudpage, budnum, eph); - if (other_bud_size == 0) { /* was unbuddied: unlist and free */ - chunks = zbud_size_to_chunks(bud_size) ; - if (zbudpage_is_zombie(zbudpage)) { - if (eph) - zbud_pers_zombie_count = - atomic_dec_return(&zbud_eph_zombie_atomic); - else - zbud_pers_zombie_count = - atomic_dec_return(&zbud_pers_zombie_atomic); - zbudpage_clear_zombie(zbudpage); - } else { - BUG_ON(list_empty(&unbud[chunks].list)); - list_del_init(&zbudpage->budlist); - unbud[chunks].count--; - } - list_del_init(&zbudpage->lru); - spin_unlock(lists_lock); - if (eph) - zbud_eph_unbuddied_count--; - else - zbud_pers_unbuddied_count--; - page = zbud_unuse_zbudpage(zbudpage, eph); - } else { /* was buddied: move remaining buddy to unbuddied list */ - chunks = zbud_size_to_chunks(other_bud_size) ; - if (!zbudpage_is_zombie(zbudpage)) { - list_del_init(&zbudpage->budlist); - list_add_tail(&zbudpage->budlist, &unbud[chunks].list); - unbud[chunks].count++; - } - if (eph) { - zbud_eph_buddied_count--; - zbud_eph_unbuddied_count++; - } else { - zbud_pers_unbuddied_count++; - zbud_pers_buddied_count--; - } - /* don't mess with lru, no need to move it */ - zbudpage_spin_unlock(zbudpage); - spin_unlock(lists_lock); - } -out: - return page; -} - -/* - * Given a tmem handle, and a kmapped pointer to compressed data of - * the given size, try to find an unbuddied zbudpage in which to - * create a zbud. If found, put it there, mark the zbudpage unevictable, - * and return a zbudref to it. Else return NULL. - */ -struct zbudref *zbud_match_prep(struct tmem_handle *th, bool eph, - void *cdata, unsigned size) -{ - struct zbudpage *zbudpage = NULL, *zbudpage2; - unsigned long budnum = 0UL; - unsigned nchunks; - int i, found_good_buddy = 0; - spinlock_t *lists_lock = - eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; - struct zbud_unbuddied *unbud = - eph ? zbud_eph_unbuddied : zbud_pers_unbuddied; - - size += sizeof(struct tmem_handle); - nchunks = zbud_size_to_chunks(size); - for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) { - spin_lock(lists_lock); - if (!list_empty(&unbud[i].list)) { - list_for_each_entry_safe(zbudpage, zbudpage2, - &unbud[i].list, budlist) { - if (zbudpage_spin_trylock(zbudpage)) { - found_good_buddy = i; - goto found_unbuddied; - } - } - } - spin_unlock(lists_lock); - } - zbudpage = NULL; - goto out; - -found_unbuddied: - BUG_ON(!zbudpage_is_locked(zbudpage)); - BUG_ON(!((zbudpage->zbud0_size == 0) ^ (zbudpage->zbud1_size == 0))); - if (zbudpage->zbud0_size == 0) - budnum = 0UL; - else if (zbudpage->zbud1_size == 0) - budnum = 1UL; - list_del_init(&zbudpage->budlist); - if (eph) { - list_add_tail(&zbudpage->budlist, &zbud_eph_buddied_list); - unbud[found_good_buddy].count--; - zbud_eph_unbuddied_count--; - zbud_eph_buddied_count++; - /* "promote" raw zbudpage to most-recently-used */ - list_del_init(&zbudpage->lru); - list_add_tail(&zbudpage->lru, &zbud_eph_lru_list); - } else { - list_add_tail(&zbudpage->budlist, &zbud_pers_buddied_list); - unbud[found_good_buddy].count--; - zbud_pers_unbuddied_count--; - zbud_pers_buddied_count++; - /* "promote" raw zbudpage to most-recently-used */ - list_del_init(&zbudpage->lru); - list_add_tail(&zbudpage->lru, &zbud_pers_lru_list); - } - zbud_init_zbud(zbudpage, th, eph, cdata, budnum, size); - zbudpage->unevictable++; - BUG_ON(zbudpage->unevictable == 3); - zbudpage_spin_unlock(zbudpage); - spin_unlock(lists_lock); -out: - return zbudpage_to_zbudref(zbudpage, budnum); - -} - -/* - * Given a tmem handle, and a kmapped pointer to compressed data of - * the given size, and a newly allocated struct page, create an unevictable - * zbud in that new page and return a zbudref to it. - */ -struct zbudref *zbud_create_prep(struct tmem_handle *th, bool eph, - void *cdata, unsigned size, - struct page *newpage) -{ - struct zbudpage *zbudpage; - unsigned long budnum = 0; - unsigned nchunks; - spinlock_t *lists_lock = - eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; - struct zbud_unbuddied *unbud = - eph ? zbud_eph_unbuddied : zbud_pers_unbuddied; - -#if 0 - /* this may be worth it later to support decompress-in-place? */ - static unsigned long counter; - budnum = counter++ & 1; /* alternate using zbud0 and zbud1 */ -#endif - - if (size > zbud_max_buddy_size()) - return NULL; - if (newpage == NULL) - return NULL; - - size += sizeof(struct tmem_handle); - nchunks = zbud_size_to_chunks(size) ; - spin_lock(lists_lock); - zbudpage = zbud_init_zbudpage(newpage, eph); - zbudpage_spin_lock(zbudpage); - list_add_tail(&zbudpage->budlist, &unbud[nchunks].list); - if (eph) { - list_add_tail(&zbudpage->lru, &zbud_eph_lru_list); - zbud_eph_unbuddied_count++; - } else { - list_add_tail(&zbudpage->lru, &zbud_pers_lru_list); - zbud_pers_unbuddied_count++; - } - unbud[nchunks].count++; - zbud_init_zbud(zbudpage, th, eph, cdata, budnum, size); - zbudpage->unevictable++; - BUG_ON(zbudpage->unevictable == 3); - zbudpage_spin_unlock(zbudpage); - spin_unlock(lists_lock); - return zbudpage_to_zbudref(zbudpage, budnum); -} - -/* - * Finish creation of a zbud by, assuming another zbud isn't being created - * in parallel, marking it evictable. - */ -void zbud_create_finish(struct zbudref *zref, bool eph) -{ - struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); - spinlock_t *lists_lock = - eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; - - spin_lock(lists_lock); - zbudpage_spin_lock(zbudpage); - BUG_ON(zbudpage_is_dying(zbudpage)); - zbudpage->unevictable--; - BUG_ON((int)zbudpage->unevictable < 0); - zbudpage_spin_unlock(zbudpage); - spin_unlock(lists_lock); -} - -/* - * Given a zbudref and a struct page, decompress the data from - * the zbud into the physical page represented by the struct page - * by upcalling to zcache_decompress - */ -int zbud_decompress(struct page *data_page, struct zbudref *zref, bool eph, - void (*decompress)(char *, unsigned int, char *)) -{ - struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); - unsigned long budnum = zbudref_budnum(zref); - void *zbpg; - char *to_va, *from_va; - unsigned size; - int ret = -1; - spinlock_t *lists_lock = - eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; - - spin_lock(lists_lock); - zbudpage_spin_lock(zbudpage); - if (zbudpage_is_dying(zbudpage)) { - /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ - goto out; - } - zbpg = kmap_zbudpage_atomic(zbudpage); - to_va = kmap_atomic(data_page); - if (budnum == 0) - size = zbudpage->zbud0_size; - else - size = zbudpage->zbud1_size; - BUG_ON(size == 0 || size > zbud_max_size()); - from_va = zbud_data(zbpg, budnum, size); - from_va += sizeof(struct tmem_handle); - size -= sizeof(struct tmem_handle); - decompress(from_va, size, to_va); - kunmap_atomic(to_va); - kunmap_zbudpage_atomic(zbpg); - ret = 0; -out: - zbudpage_spin_unlock(zbudpage); - spin_unlock(lists_lock); - return ret; -} - -/* - * Given a zbudref and a kernel pointer, copy the data from - * the zbud to the kernel pointer. - */ -int zbud_copy_from_zbud(char *to_va, struct zbudref *zref, - size_t *sizep, bool eph) -{ - struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); - unsigned long budnum = zbudref_budnum(zref); - void *zbpg; - char *from_va; - unsigned size; - int ret = -1; - spinlock_t *lists_lock = - eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; - - spin_lock(lists_lock); - zbudpage_spin_lock(zbudpage); - if (zbudpage_is_dying(zbudpage)) { - /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ - goto out; - } - zbpg = kmap_zbudpage_atomic(zbudpage); - if (budnum == 0) - size = zbudpage->zbud0_size; - else - size = zbudpage->zbud1_size; - BUG_ON(size == 0 || size > zbud_max_size()); - from_va = zbud_data(zbpg, budnum, size); - from_va += sizeof(struct tmem_handle); - size -= sizeof(struct tmem_handle); - *sizep = size; - memcpy(to_va, from_va, size); - - kunmap_zbudpage_atomic(zbpg); - ret = 0; -out: - zbudpage_spin_unlock(zbudpage); - spin_unlock(lists_lock); - return ret; -} - -/* - * Given a zbudref and a kernel pointer, copy the data from - * the kernel pointer to the zbud. - */ -int zbud_copy_to_zbud(struct zbudref *zref, char *from_va, bool eph) -{ - struct zbudpage *zbudpage = zbudref_to_zbudpage(zref); - unsigned long budnum = zbudref_budnum(zref); - void *zbpg; - char *to_va; - unsigned size; - int ret = -1; - spinlock_t *lists_lock = - eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; - - spin_lock(lists_lock); - zbudpage_spin_lock(zbudpage); - if (zbudpage_is_dying(zbudpage)) { - /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */ - goto out; - } - zbpg = kmap_zbudpage_atomic(zbudpage); - if (budnum == 0) - size = zbudpage->zbud0_size; - else - size = zbudpage->zbud1_size; - BUG_ON(size == 0 || size > zbud_max_size()); - to_va = zbud_data(zbpg, budnum, size); - to_va += sizeof(struct tmem_handle); - size -= sizeof(struct tmem_handle); - memcpy(to_va, from_va, size); - - kunmap_zbudpage_atomic(zbpg); - ret = 0; -out: - zbudpage_spin_unlock(zbudpage); - spin_unlock(lists_lock); - return ret; -} - -/* - * Choose an ephemeral LRU zbudpage that is evictable (not locked), ensure - * there are no references to it remaining, and return the now unused - * (and re-init'ed) struct page and the total amount of compressed - * data that was evicted. - */ -struct page *zbud_evict_pageframe_lru(unsigned int *zsize, unsigned int *zpages) -{ - struct zbudpage *zbudpage = NULL, *zbudpage2; - struct zbud_unbuddied *unbud = zbud_eph_unbuddied; - struct page *page = NULL; - bool irqs_disabled = irqs_disabled(); - - /* - * Since this can be called indirectly from cleancache_put, which - * has interrupts disabled, as well as frontswap_put, which does not, - * we need to be able to handle both cases, even though it is ugly. - */ - if (irqs_disabled) - spin_lock(&zbud_eph_lists_lock); - else - spin_lock_bh(&zbud_eph_lists_lock); - *zsize = 0; - if (list_empty(&zbud_eph_lru_list)) - goto unlock_out; - list_for_each_entry_safe(zbudpage, zbudpage2, &zbud_eph_lru_list, lru) { - /* skip a locked zbudpage */ - if (unlikely(!zbudpage_spin_trylock(zbudpage))) - continue; - /* skip an unevictable zbudpage */ - if (unlikely(zbudpage->unevictable != 0)) { - zbudpage_spin_unlock(zbudpage); - continue; - } - /* got a locked evictable page */ - goto evict_page; - - } -unlock_out: - /* no unlocked evictable pages, give up */ - if (irqs_disabled) - spin_unlock(&zbud_eph_lists_lock); - else - spin_unlock_bh(&zbud_eph_lists_lock); - goto out; - -evict_page: - list_del_init(&zbudpage->budlist); - list_del_init(&zbudpage->lru); - zbudpage_set_dying(zbudpage); - /* - * the zbudpage is now "dying" and attempts to read, write, - * or delete data from it will be ignored - */ - if (zbudpage->zbud0_size != 0 && zbudpage->zbud1_size != 0) { - *zsize = zbudpage->zbud0_size + zbudpage->zbud1_size - - (2 * sizeof(struct tmem_handle)); - *zpages = 2; - } else if (zbudpage->zbud0_size != 0) { - unbud[zbud_size_to_chunks(zbudpage->zbud0_size)].count--; - *zsize = zbudpage->zbud0_size - sizeof(struct tmem_handle); - *zpages = 1; - } else if (zbudpage->zbud1_size != 0) { - unbud[zbud_size_to_chunks(zbudpage->zbud1_size)].count--; - *zsize = zbudpage->zbud1_size - sizeof(struct tmem_handle); - *zpages = 1; - } else { - BUG(); - } - spin_unlock(&zbud_eph_lists_lock); - zbud_eph_evicted_pageframes++; - if (*zpages == 1) - zbud_eph_unbuddied_count--; - else - zbud_eph_buddied_count--; - zbud_evict_tmem(zbudpage); - zbudpage_spin_lock(zbudpage); - zbudpage_clear_dying(zbudpage); - page = zbud_unuse_zbudpage(zbudpage, true); - if (!irqs_disabled) - local_bh_enable(); -out: - return page; -} - -/* - * Choose a persistent LRU zbudpage that is evictable (not locked), zombify it, - * read the tmem_handle(s) out of it into the passed array, and return the - * number of zbuds. Caller must perform necessary tmem functions and, - * indirectly, zbud functions to fetch any valid data and cause the - * now-zombified zbudpage to eventually be freed. We track the zombified - * zbudpage count so it is possible to observe if there is a leak. - FIXME: describe (ramster) case where data pointers are passed in for memcpy - */ -unsigned int zbud_make_zombie_lru(struct tmem_handle *th, unsigned char **data, - unsigned int *zsize, bool eph) -{ - struct zbudpage *zbudpage = NULL, *zbudpag2; - struct tmem_handle *thfrom; - char *from_va; - void *zbpg; - unsigned size; - int ret = 0, i; - spinlock_t *lists_lock = - eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock; - struct list_head *lru_list = - eph ? &zbud_eph_lru_list : &zbud_pers_lru_list; - - spin_lock_bh(lists_lock); - if (list_empty(lru_list)) - goto out; - list_for_each_entry_safe(zbudpage, zbudpag2, lru_list, lru) { - /* skip a locked zbudpage */ - if (unlikely(!zbudpage_spin_trylock(zbudpage))) - continue; - /* skip an unevictable zbudpage */ - if (unlikely(zbudpage->unevictable != 0)) { - zbudpage_spin_unlock(zbudpage); - continue; - } - /* got a locked evictable page */ - goto zombify_page; - } - /* no unlocked evictable pages, give up */ - goto out; - -zombify_page: - /* got an unlocked evictable page, zombify it */ - list_del_init(&zbudpage->budlist); - zbudpage_set_zombie(zbudpage); - /* FIXME what accounting do I need to do here? */ - list_del_init(&zbudpage->lru); - if (eph) { - list_add_tail(&zbudpage->lru, &zbud_eph_zombie_list); - zbud_eph_zombie_count = - atomic_inc_return(&zbud_eph_zombie_atomic); - } else { - list_add_tail(&zbudpage->lru, &zbud_pers_zombie_list); - zbud_pers_zombie_count = - atomic_inc_return(&zbud_pers_zombie_atomic); - } - /* FIXME what accounting do I need to do here? */ - zbpg = kmap_zbudpage_atomic(zbudpage); - for (i = 0; i < 2; i++) { - size = (i == 0) ? zbudpage->zbud0_size : zbudpage->zbud1_size; - if (size) { - from_va = zbud_data(zbpg, i, size); - thfrom = (struct tmem_handle *)from_va; - from_va += sizeof(struct tmem_handle); - size -= sizeof(struct tmem_handle); - if (th != NULL) - th[ret] = *thfrom; - if (data != NULL) - memcpy(data[ret], from_va, size); - if (zsize != NULL) - *zsize++ = size; - ret++; - } - } - kunmap_zbudpage_atomic(zbpg); - zbudpage_spin_unlock(zbudpage); -out: - spin_unlock_bh(lists_lock); - return ret; -} - -void zbud_init(void) -{ - int i; - - zbud_debugfs_init(); - BUG_ON((sizeof(struct tmem_handle) * 2 > CHUNK_SIZE)); - BUG_ON(sizeof(struct zbudpage) > sizeof(struct page)); - for (i = 0; i < NCHUNKS; i++) { - INIT_LIST_HEAD(&zbud_eph_unbuddied[i].list); - INIT_LIST_HEAD(&zbud_pers_unbuddied[i].list); - } -} |