aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/zcache/zbud.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/zcache/zbud.c')
-rw-r--r--drivers/staging/zcache/zbud.c1066
1 files changed, 0 insertions, 1066 deletions
diff --git a/drivers/staging/zcache/zbud.c b/drivers/staging/zcache/zbud.c
deleted file mode 100644
index 6cda4ed9ed31..000000000000
--- a/drivers/staging/zcache/zbud.c
+++ /dev/null
@@ -1,1066 +0,0 @@
-/*
- * zbud.c - Compression buddies allocator
- *
- * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp.
- *
- * Compression buddies ("zbud") provides for efficiently packing two
- * (or, possibly in the future, more) compressed pages ("zpages") into
- * a single "raw" pageframe and for tracking both zpages and pageframes
- * so that whole pageframes can be easily reclaimed in LRU-like order.
- * It is designed to be used in conjunction with transcendent memory
- * ("tmem"); for example separate LRU lists are maintained for persistent
- * vs. ephemeral pages.
- *
- * A zbudpage is an overlay for a struct page and thus each zbudpage
- * refers to a physical pageframe of RAM. When the caller passes a
- * struct page from the kernel's page allocator, zbud "transforms" it
- * to a zbudpage which sets/uses a different set of fields than the
- * struct-page and thus must "untransform" it back by reinitializing
- * certain fields before the struct-page can be freed. The fields
- * of a zbudpage include a page lock for controlling access to the
- * corresponding pageframe, and there is a size field for each zpage.
- * Each zbudpage also lives on two linked lists: a "budlist" which is
- * used to support efficient buddying of zpages; and an "lru" which
- * is used for reclaiming pageframes in approximately least-recently-used
- * order.
- *
- * A zbudpageframe is a pageframe divided up into aligned 64-byte "chunks"
- * which contain the compressed data for zero, one, or two zbuds. Contained
- * with the compressed data is a tmem_handle which is a key to allow
- * the same data to be found via the tmem interface so the zpage can
- * be invalidated (for ephemeral pages) or repatriated to the swap cache
- * (for persistent pages). The contents of a zbudpageframe must never
- * be accessed without holding the page lock for the corresponding
- * zbudpage and, to accomodate highmem machines, the contents may
- * only be examined or changes when kmapped. Thus, when in use, a
- * kmapped zbudpageframe is referred to in the zbud code as "void *zbpg".
- *
- * Note that the term "zbud" refers to the combination of a zpage and
- * a tmem_handle that is stored as one of possibly two "buddied" zpages;
- * it also generically refers to this allocator... sorry for any confusion.
- *
- * A zbudref is a pointer to a struct zbudpage (which can be cast to a
- * struct page), with the LSB either cleared or set to indicate, respectively,
- * the first or second zpage in the zbudpageframe. Since a zbudref can be
- * cast to a pointer, it is used as the tmem "pampd" pointer and uniquely
- * references a stored tmem page and so is the only zbud data structure
- * externally visible to zbud.c/zbud.h.
- *
- * Since we wish to reclaim entire pageframes but zpages may be randomly
- * added and deleted to any given pageframe, we approximate LRU by
- * promoting a pageframe to MRU when a zpage is added to it, but
- * leaving it at the current place in the list when a zpage is deleted
- * from it. As a side effect, zpages that are difficult to buddy (e.g.
- * very large paages) will be reclaimed faster than average, which seems
- * reasonable.
- *
- * In the current implementation, no more than two zpages may be stored in
- * any pageframe and no zpage ever crosses a pageframe boundary. While
- * other zpage allocation mechanisms may allow greater density, this two
- * zpage-per-pageframe limit both ensures simple reclaim of pageframes
- * (including garbage collection of references to the contents of those
- * pageframes from tmem data structures) AND avoids the need for compaction.
- * With additional complexity, zbud could be modified to support storing
- * up to three zpages per pageframe or, to handle larger average zpages,
- * up to three zpages per pair of pageframes, but it is not clear if the
- * additional complexity would be worth it. So consider it an exercise
- * for future developers.
- *
- * Note also that zbud does no page allocation or freeing. This is so
- * that the caller has complete control over and, for accounting, visibility
- * into if/when pages are allocated and freed.
- *
- * Finally, note that zbud limits the size of zpages it can store; the
- * caller must check the zpage size with zbud_max_buddy_size before
- * storing it, else BUGs will result. User beware.
- */
-
-#include <linux/module.h>
-#include <linux/highmem.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/pagemap.h>
-#include <linux/atomic.h>
-#include <linux/bug.h>
-#include "tmem.h"
-#include "zcache.h"
-#include "zbud.h"
-
-/*
- * We need to ensure that a struct zbudpage is never larger than a
- * struct page. This is checked with a BUG_ON in zbud_init.
- *
- * The unevictable field indicates that a zbud is being added to the
- * zbudpage. Since this is a two-phase process (due to tmem locking),
- * this field locks the zbudpage against eviction when a zbud match
- * or creation is in process. Since this addition process may occur
- * in parallel for two zbuds in one zbudpage, the field is a counter
- * that must not exceed two.
- */
-struct zbudpage {
- union {
- struct page page;
- struct {
- unsigned long space_for_flags;
- struct {
- unsigned zbud0_size:PAGE_SHIFT;
- unsigned zbud1_size:PAGE_SHIFT;
- unsigned unevictable:2;
- };
- struct list_head budlist;
- struct list_head lru;
- };
- };
-};
-#if (PAGE_SHIFT * 2) + 2 > BITS_PER_LONG
-#error "zbud won't work for this arch, PAGE_SIZE is too large"
-#endif
-
-struct zbudref {
- union {
- struct zbudpage *zbudpage;
- unsigned long zbudref;
- };
-};
-
-#define CHUNK_SHIFT 6
-#define CHUNK_SIZE (1 << CHUNK_SHIFT)
-#define CHUNK_MASK (~(CHUNK_SIZE-1))
-#define NCHUNKS (PAGE_SIZE >> CHUNK_SHIFT)
-#define MAX_CHUNK (NCHUNKS-1)
-
-/*
- * The following functions deal with the difference between struct
- * page and struct zbudpage. Note the hack of using the pageflags
- * from struct page; this is to avoid duplicating all the complex
- * pageflag macros.
- */
-static inline void zbudpage_spin_lock(struct zbudpage *zbudpage)
-{
- struct page *page = (struct page *)zbudpage;
-
- while (unlikely(test_and_set_bit_lock(PG_locked, &page->flags))) {
- do {
- cpu_relax();
- } while (test_bit(PG_locked, &page->flags));
- }
-}
-
-static inline void zbudpage_spin_unlock(struct zbudpage *zbudpage)
-{
- struct page *page = (struct page *)zbudpage;
-
- clear_bit(PG_locked, &page->flags);
-}
-
-static inline int zbudpage_spin_trylock(struct zbudpage *zbudpage)
-{
- return trylock_page((struct page *)zbudpage);
-}
-
-static inline int zbudpage_is_locked(struct zbudpage *zbudpage)
-{
- return PageLocked((struct page *)zbudpage);
-}
-
-static inline void *kmap_zbudpage_atomic(struct zbudpage *zbudpage)
-{
- return kmap_atomic((struct page *)zbudpage);
-}
-
-/*
- * A dying zbudpage is an ephemeral page in the process of being evicted.
- * Any data contained in the zbudpage is invalid and we are just waiting for
- * the tmem pampds to be invalidated before freeing the page
- */
-static inline int zbudpage_is_dying(struct zbudpage *zbudpage)
-{
- struct page *page = (struct page *)zbudpage;
-
- return test_bit(PG_reclaim, &page->flags);
-}
-
-static inline void zbudpage_set_dying(struct zbudpage *zbudpage)
-{
- struct page *page = (struct page *)zbudpage;
-
- set_bit(PG_reclaim, &page->flags);
-}
-
-static inline void zbudpage_clear_dying(struct zbudpage *zbudpage)
-{
- struct page *page = (struct page *)zbudpage;
-
- clear_bit(PG_reclaim, &page->flags);
-}
-
-/*
- * A zombie zbudpage is a persistent page in the process of being evicted.
- * The data contained in the zbudpage is valid and we are just waiting for
- * the tmem pampds to be invalidated before freeing the page
- */
-static inline int zbudpage_is_zombie(struct zbudpage *zbudpage)
-{
- struct page *page = (struct page *)zbudpage;
-
- return test_bit(PG_dirty, &page->flags);
-}
-
-static inline void zbudpage_set_zombie(struct zbudpage *zbudpage)
-{
- struct page *page = (struct page *)zbudpage;
-
- set_bit(PG_dirty, &page->flags);
-}
-
-static inline void zbudpage_clear_zombie(struct zbudpage *zbudpage)
-{
- struct page *page = (struct page *)zbudpage;
-
- clear_bit(PG_dirty, &page->flags);
-}
-
-static inline void kunmap_zbudpage_atomic(void *zbpg)
-{
- kunmap_atomic(zbpg);
-}
-
-/*
- * zbud "translation" and helper functions
- */
-
-static inline struct zbudpage *zbudref_to_zbudpage(struct zbudref *zref)
-{
- unsigned long zbud = (unsigned long)zref;
- zbud &= ~1UL;
- return (struct zbudpage *)zbud;
-}
-
-static inline struct zbudref *zbudpage_to_zbudref(struct zbudpage *zbudpage,
- unsigned budnum)
-{
- unsigned long zbud = (unsigned long)zbudpage;
- BUG_ON(budnum > 1);
- zbud |= budnum;
- return (struct zbudref *)zbud;
-}
-
-static inline int zbudref_budnum(struct zbudref *zbudref)
-{
- unsigned long zbud = (unsigned long)zbudref;
- return zbud & 1UL;
-}
-
-static inline unsigned zbud_max_size(void)
-{
- return MAX_CHUNK << CHUNK_SHIFT;
-}
-
-static inline unsigned zbud_size_to_chunks(unsigned size)
-{
- BUG_ON(size == 0 || size > zbud_max_size());
- return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
-}
-
-/* can only be used between kmap_zbudpage_atomic/kunmap_zbudpage_atomic! */
-static inline char *zbud_data(void *zbpg,
- unsigned budnum, unsigned size)
-{
- char *p;
-
- BUG_ON(size == 0 || size > zbud_max_size());
- p = (char *)zbpg;
- if (budnum == 1)
- p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK);
- return p;
-}
-
-/*
- * These are all informative and exposed through debugfs... except for
- * the arrays... anyone know how to do that? To avoid confusion for
- * debugfs viewers, some of these should also be atomic_long_t, but
- * I don't know how to expose atomics via debugfs either...
- */
-static ssize_t zbud_eph_pageframes;
-static ssize_t zbud_pers_pageframes;
-static ssize_t zbud_eph_zpages;
-static ssize_t zbud_pers_zpages;
-static u64 zbud_eph_zbytes;
-static u64 zbud_pers_zbytes;
-static ssize_t zbud_eph_evicted_pageframes;
-static ssize_t zbud_pers_evicted_pageframes;
-static ssize_t zbud_eph_cumul_zpages;
-static ssize_t zbud_pers_cumul_zpages;
-static u64 zbud_eph_cumul_zbytes;
-static u64 zbud_pers_cumul_zbytes;
-static ssize_t zbud_eph_cumul_chunk_counts[NCHUNKS];
-static ssize_t zbud_pers_cumul_chunk_counts[NCHUNKS];
-static ssize_t zbud_eph_buddied_count;
-static ssize_t zbud_pers_buddied_count;
-static ssize_t zbud_eph_unbuddied_count;
-static ssize_t zbud_pers_unbuddied_count;
-static ssize_t zbud_eph_zombie_count;
-static ssize_t zbud_pers_zombie_count;
-static atomic_t zbud_eph_zombie_atomic;
-static atomic_t zbud_pers_zombie_atomic;
-
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#define zdfs debugfs_create_size_t
-#define zdfs64 debugfs_create_u64
-static int zbud_debugfs_init(void)
-{
- struct dentry *root = debugfs_create_dir("zbud", NULL);
- if (root == NULL)
- return -ENXIO;
-
- /*
- * would be nice to dump the sizes of the unbuddied
- * arrays, like was done with sysfs, but it doesn't
- * look like debugfs is flexible enough to do that
- */
- zdfs64("eph_zbytes", S_IRUGO, root, &zbud_eph_zbytes);
- zdfs64("eph_cumul_zbytes", S_IRUGO, root, &zbud_eph_cumul_zbytes);
- zdfs64("pers_zbytes", S_IRUGO, root, &zbud_pers_zbytes);
- zdfs64("pers_cumul_zbytes", S_IRUGO, root, &zbud_pers_cumul_zbytes);
- zdfs("eph_cumul_zpages", S_IRUGO, root, &zbud_eph_cumul_zpages);
- zdfs("eph_evicted_pageframes", S_IRUGO, root,
- &zbud_eph_evicted_pageframes);
- zdfs("eph_zpages", S_IRUGO, root, &zbud_eph_zpages);
- zdfs("eph_pageframes", S_IRUGO, root, &zbud_eph_pageframes);
- zdfs("eph_buddied_count", S_IRUGO, root, &zbud_eph_buddied_count);
- zdfs("eph_unbuddied_count", S_IRUGO, root, &zbud_eph_unbuddied_count);
- zdfs("pers_cumul_zpages", S_IRUGO, root, &zbud_pers_cumul_zpages);
- zdfs("pers_evicted_pageframes", S_IRUGO, root,
- &zbud_pers_evicted_pageframes);
- zdfs("pers_zpages", S_IRUGO, root, &zbud_pers_zpages);
- zdfs("pers_pageframes", S_IRUGO, root, &zbud_pers_pageframes);
- zdfs("pers_buddied_count", S_IRUGO, root, &zbud_pers_buddied_count);
- zdfs("pers_unbuddied_count", S_IRUGO, root, &zbud_pers_unbuddied_count);
- zdfs("pers_zombie_count", S_IRUGO, root, &zbud_pers_zombie_count);
- return 0;
-}
-#undef zdfs
-#undef zdfs64
-#else
-static inline int zbud_debugfs_init(void)
-{
- return 0;
-}
-#endif
-
-/* protects the buddied list and all unbuddied lists */
-static DEFINE_SPINLOCK(zbud_eph_lists_lock);
-static DEFINE_SPINLOCK(zbud_pers_lists_lock);
-
-struct zbud_unbuddied {
- struct list_head list;
- unsigned count;
-};
-
-/* list N contains pages with N chunks USED and NCHUNKS-N unused */
-/* element 0 is never used but optimizing that isn't worth it */
-static struct zbud_unbuddied zbud_eph_unbuddied[NCHUNKS];
-static struct zbud_unbuddied zbud_pers_unbuddied[NCHUNKS];
-static LIST_HEAD(zbud_eph_lru_list);
-static LIST_HEAD(zbud_pers_lru_list);
-static LIST_HEAD(zbud_eph_buddied_list);
-static LIST_HEAD(zbud_pers_buddied_list);
-static LIST_HEAD(zbud_eph_zombie_list);
-static LIST_HEAD(zbud_pers_zombie_list);
-
-/*
- * Given a struct page, transform it to a zbudpage so that it can be
- * used by zbud and initialize fields as necessary.
- */
-static inline struct zbudpage *zbud_init_zbudpage(struct page *page, bool eph)
-{
- struct zbudpage *zbudpage = (struct zbudpage *)page;
-
- BUG_ON(page == NULL);
- INIT_LIST_HEAD(&zbudpage->budlist);
- INIT_LIST_HEAD(&zbudpage->lru);
- zbudpage->zbud0_size = 0;
- zbudpage->zbud1_size = 0;
- zbudpage->unevictable = 0;
- if (eph)
- zbud_eph_pageframes++;
- else
- zbud_pers_pageframes++;
- return zbudpage;
-}
-
-/* "Transform" a zbudpage back to a struct page suitable to free. */
-static inline struct page *zbud_unuse_zbudpage(struct zbudpage *zbudpage,
- bool eph)
-{
- struct page *page = (struct page *)zbudpage;
-
- BUG_ON(!list_empty(&zbudpage->budlist));
- BUG_ON(!list_empty(&zbudpage->lru));
- BUG_ON(zbudpage->zbud0_size != 0);
- BUG_ON(zbudpage->zbud1_size != 0);
- BUG_ON(!PageLocked(page));
- BUG_ON(zbudpage->unevictable != 0);
- BUG_ON(zbudpage_is_dying(zbudpage));
- BUG_ON(zbudpage_is_zombie(zbudpage));
- if (eph)
- zbud_eph_pageframes--;
- else
- zbud_pers_pageframes--;
- zbudpage_spin_unlock(zbudpage);
- page_mapcount_reset(page);
- init_page_count(page);
- page->index = 0;
- return page;
-}
-
-/* Mark a zbud as unused and do accounting */
-static inline void zbud_unuse_zbud(struct zbudpage *zbudpage,
- int budnum, bool eph)
-{
- unsigned size;
-
- BUG_ON(!zbudpage_is_locked(zbudpage));
- if (budnum == 0) {
- size = zbudpage->zbud0_size;
- zbudpage->zbud0_size = 0;
- } else {
- size = zbudpage->zbud1_size;
- zbudpage->zbud1_size = 0;
- }
- if (eph) {
- zbud_eph_zbytes -= size;
- zbud_eph_zpages--;
- } else {
- zbud_pers_zbytes -= size;
- zbud_pers_zpages--;
- }
-}
-
-/*
- * Given a zbudpage/budnum/size, a tmem handle, and a kmapped pointer
- * to some data, set up the zbud appropriately including data copying
- * and accounting. Note that if cdata is NULL, the data copying is
- * skipped. (This is useful for lazy writes such as for RAMster.)
- */
-static void zbud_init_zbud(struct zbudpage *zbudpage, struct tmem_handle *th,
- bool eph, void *cdata,
- unsigned budnum, unsigned size)
-{
- char *to;
- void *zbpg;
- struct tmem_handle *to_th;
- unsigned nchunks = zbud_size_to_chunks(size);
-
- BUG_ON(!zbudpage_is_locked(zbudpage));
- zbpg = kmap_zbudpage_atomic(zbudpage);
- to = zbud_data(zbpg, budnum, size);
- to_th = (struct tmem_handle *)to;
- to_th->index = th->index;
- to_th->oid = th->oid;
- to_th->pool_id = th->pool_id;
- to_th->client_id = th->client_id;
- to += sizeof(struct tmem_handle);
- if (cdata != NULL)
- memcpy(to, cdata, size - sizeof(struct tmem_handle));
- kunmap_zbudpage_atomic(zbpg);
- if (budnum == 0)
- zbudpage->zbud0_size = size;
- else
- zbudpage->zbud1_size = size;
- if (eph) {
- zbud_eph_cumul_chunk_counts[nchunks]++;
- zbud_eph_zpages++;
- zbud_eph_cumul_zpages++;
- zbud_eph_zbytes += size;
- zbud_eph_cumul_zbytes += size;
- } else {
- zbud_pers_cumul_chunk_counts[nchunks]++;
- zbud_pers_zpages++;
- zbud_pers_cumul_zpages++;
- zbud_pers_zbytes += size;
- zbud_pers_cumul_zbytes += size;
- }
-}
-
-/*
- * Given a locked dying zbudpage, read out the tmem handles from the data,
- * unlock the page, then use the handles to tell tmem to flush out its
- * references
- */
-static void zbud_evict_tmem(struct zbudpage *zbudpage)
-{
- int i, j;
- uint32_t pool_id[2], client_id[2];
- uint32_t index[2];
- struct tmem_oid oid[2];
- struct tmem_pool *pool;
- void *zbpg;
- struct tmem_handle *th;
- unsigned size;
-
- /* read out the tmem handles from the data and set aside */
- zbpg = kmap_zbudpage_atomic(zbudpage);
- for (i = 0, j = 0; i < 2; i++) {
- size = (i == 0) ? zbudpage->zbud0_size : zbudpage->zbud1_size;
- if (size) {
- th = (struct tmem_handle *)zbud_data(zbpg, i, size);
- client_id[j] = th->client_id;
- pool_id[j] = th->pool_id;
- oid[j] = th->oid;
- index[j] = th->index;
- j++;
- zbud_unuse_zbud(zbudpage, i, true);
- }
- }
- kunmap_zbudpage_atomic(zbpg);
- zbudpage_spin_unlock(zbudpage);
- /* zbudpage is now an unlocked dying... tell tmem to flush pointers */
- for (i = 0; i < j; i++) {
- pool = zcache_get_pool_by_id(client_id[i], pool_id[i]);
- if (pool != NULL) {
- tmem_flush_page(pool, &oid[i], index[i]);
- zcache_put_pool(pool);
- }
- }
-}
-
-/*
- * Externally callable zbud handling routines.
- */
-
-/*
- * Return the maximum size compressed page that can be stored (secretly
- * setting aside space for the tmem handle.
- */
-unsigned int zbud_max_buddy_size(void)
-{
- return zbud_max_size() - sizeof(struct tmem_handle);
-}
-
-/*
- * Given a zbud reference, free the corresponding zbud from all lists,
- * mark it as unused, do accounting, and if the freeing of the zbud
- * frees up an entire pageframe, return it to the caller (else NULL).
- */
-struct page *zbud_free_and_delist(struct zbudref *zref, bool eph,
- unsigned int *zsize, unsigned int *zpages)
-{
- unsigned long budnum = zbudref_budnum(zref);
- struct zbudpage *zbudpage = zbudref_to_zbudpage(zref);
- struct page *page = NULL;
- unsigned chunks, bud_size, other_bud_size;
- spinlock_t *lists_lock =
- eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock;
- struct zbud_unbuddied *unbud =
- eph ? zbud_eph_unbuddied : zbud_pers_unbuddied;
-
-
- spin_lock(lists_lock);
- zbudpage_spin_lock(zbudpage);
- if (zbudpage_is_dying(zbudpage)) {
- /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */
- zbudpage_spin_unlock(zbudpage);
- spin_unlock(lists_lock);
- *zpages = 0;
- *zsize = 0;
- goto out;
- }
- if (budnum == 0) {
- bud_size = zbudpage->zbud0_size;
- other_bud_size = zbudpage->zbud1_size;
- } else {
- bud_size = zbudpage->zbud1_size;
- other_bud_size = zbudpage->zbud0_size;
- }
- *zsize = bud_size - sizeof(struct tmem_handle);
- *zpages = 1;
- zbud_unuse_zbud(zbudpage, budnum, eph);
- if (other_bud_size == 0) { /* was unbuddied: unlist and free */
- chunks = zbud_size_to_chunks(bud_size) ;
- if (zbudpage_is_zombie(zbudpage)) {
- if (eph)
- zbud_pers_zombie_count =
- atomic_dec_return(&zbud_eph_zombie_atomic);
- else
- zbud_pers_zombie_count =
- atomic_dec_return(&zbud_pers_zombie_atomic);
- zbudpage_clear_zombie(zbudpage);
- } else {
- BUG_ON(list_empty(&unbud[chunks].list));
- list_del_init(&zbudpage->budlist);
- unbud[chunks].count--;
- }
- list_del_init(&zbudpage->lru);
- spin_unlock(lists_lock);
- if (eph)
- zbud_eph_unbuddied_count--;
- else
- zbud_pers_unbuddied_count--;
- page = zbud_unuse_zbudpage(zbudpage, eph);
- } else { /* was buddied: move remaining buddy to unbuddied list */
- chunks = zbud_size_to_chunks(other_bud_size) ;
- if (!zbudpage_is_zombie(zbudpage)) {
- list_del_init(&zbudpage->budlist);
- list_add_tail(&zbudpage->budlist, &unbud[chunks].list);
- unbud[chunks].count++;
- }
- if (eph) {
- zbud_eph_buddied_count--;
- zbud_eph_unbuddied_count++;
- } else {
- zbud_pers_unbuddied_count++;
- zbud_pers_buddied_count--;
- }
- /* don't mess with lru, no need to move it */
- zbudpage_spin_unlock(zbudpage);
- spin_unlock(lists_lock);
- }
-out:
- return page;
-}
-
-/*
- * Given a tmem handle, and a kmapped pointer to compressed data of
- * the given size, try to find an unbuddied zbudpage in which to
- * create a zbud. If found, put it there, mark the zbudpage unevictable,
- * and return a zbudref to it. Else return NULL.
- */
-struct zbudref *zbud_match_prep(struct tmem_handle *th, bool eph,
- void *cdata, unsigned size)
-{
- struct zbudpage *zbudpage = NULL, *zbudpage2;
- unsigned long budnum = 0UL;
- unsigned nchunks;
- int i, found_good_buddy = 0;
- spinlock_t *lists_lock =
- eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock;
- struct zbud_unbuddied *unbud =
- eph ? zbud_eph_unbuddied : zbud_pers_unbuddied;
-
- size += sizeof(struct tmem_handle);
- nchunks = zbud_size_to_chunks(size);
- for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) {
- spin_lock(lists_lock);
- if (!list_empty(&unbud[i].list)) {
- list_for_each_entry_safe(zbudpage, zbudpage2,
- &unbud[i].list, budlist) {
- if (zbudpage_spin_trylock(zbudpage)) {
- found_good_buddy = i;
- goto found_unbuddied;
- }
- }
- }
- spin_unlock(lists_lock);
- }
- zbudpage = NULL;
- goto out;
-
-found_unbuddied:
- BUG_ON(!zbudpage_is_locked(zbudpage));
- BUG_ON(!((zbudpage->zbud0_size == 0) ^ (zbudpage->zbud1_size == 0)));
- if (zbudpage->zbud0_size == 0)
- budnum = 0UL;
- else if (zbudpage->zbud1_size == 0)
- budnum = 1UL;
- list_del_init(&zbudpage->budlist);
- if (eph) {
- list_add_tail(&zbudpage->budlist, &zbud_eph_buddied_list);
- unbud[found_good_buddy].count--;
- zbud_eph_unbuddied_count--;
- zbud_eph_buddied_count++;
- /* "promote" raw zbudpage to most-recently-used */
- list_del_init(&zbudpage->lru);
- list_add_tail(&zbudpage->lru, &zbud_eph_lru_list);
- } else {
- list_add_tail(&zbudpage->budlist, &zbud_pers_buddied_list);
- unbud[found_good_buddy].count--;
- zbud_pers_unbuddied_count--;
- zbud_pers_buddied_count++;
- /* "promote" raw zbudpage to most-recently-used */
- list_del_init(&zbudpage->lru);
- list_add_tail(&zbudpage->lru, &zbud_pers_lru_list);
- }
- zbud_init_zbud(zbudpage, th, eph, cdata, budnum, size);
- zbudpage->unevictable++;
- BUG_ON(zbudpage->unevictable == 3);
- zbudpage_spin_unlock(zbudpage);
- spin_unlock(lists_lock);
-out:
- return zbudpage_to_zbudref(zbudpage, budnum);
-
-}
-
-/*
- * Given a tmem handle, and a kmapped pointer to compressed data of
- * the given size, and a newly allocated struct page, create an unevictable
- * zbud in that new page and return a zbudref to it.
- */
-struct zbudref *zbud_create_prep(struct tmem_handle *th, bool eph,
- void *cdata, unsigned size,
- struct page *newpage)
-{
- struct zbudpage *zbudpage;
- unsigned long budnum = 0;
- unsigned nchunks;
- spinlock_t *lists_lock =
- eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock;
- struct zbud_unbuddied *unbud =
- eph ? zbud_eph_unbuddied : zbud_pers_unbuddied;
-
-#if 0
- /* this may be worth it later to support decompress-in-place? */
- static unsigned long counter;
- budnum = counter++ & 1; /* alternate using zbud0 and zbud1 */
-#endif
-
- if (size > zbud_max_buddy_size())
- return NULL;
- if (newpage == NULL)
- return NULL;
-
- size += sizeof(struct tmem_handle);
- nchunks = zbud_size_to_chunks(size) ;
- spin_lock(lists_lock);
- zbudpage = zbud_init_zbudpage(newpage, eph);
- zbudpage_spin_lock(zbudpage);
- list_add_tail(&zbudpage->budlist, &unbud[nchunks].list);
- if (eph) {
- list_add_tail(&zbudpage->lru, &zbud_eph_lru_list);
- zbud_eph_unbuddied_count++;
- } else {
- list_add_tail(&zbudpage->lru, &zbud_pers_lru_list);
- zbud_pers_unbuddied_count++;
- }
- unbud[nchunks].count++;
- zbud_init_zbud(zbudpage, th, eph, cdata, budnum, size);
- zbudpage->unevictable++;
- BUG_ON(zbudpage->unevictable == 3);
- zbudpage_spin_unlock(zbudpage);
- spin_unlock(lists_lock);
- return zbudpage_to_zbudref(zbudpage, budnum);
-}
-
-/*
- * Finish creation of a zbud by, assuming another zbud isn't being created
- * in parallel, marking it evictable.
- */
-void zbud_create_finish(struct zbudref *zref, bool eph)
-{
- struct zbudpage *zbudpage = zbudref_to_zbudpage(zref);
- spinlock_t *lists_lock =
- eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock;
-
- spin_lock(lists_lock);
- zbudpage_spin_lock(zbudpage);
- BUG_ON(zbudpage_is_dying(zbudpage));
- zbudpage->unevictable--;
- BUG_ON((int)zbudpage->unevictable < 0);
- zbudpage_spin_unlock(zbudpage);
- spin_unlock(lists_lock);
-}
-
-/*
- * Given a zbudref and a struct page, decompress the data from
- * the zbud into the physical page represented by the struct page
- * by upcalling to zcache_decompress
- */
-int zbud_decompress(struct page *data_page, struct zbudref *zref, bool eph,
- void (*decompress)(char *, unsigned int, char *))
-{
- struct zbudpage *zbudpage = zbudref_to_zbudpage(zref);
- unsigned long budnum = zbudref_budnum(zref);
- void *zbpg;
- char *to_va, *from_va;
- unsigned size;
- int ret = -1;
- spinlock_t *lists_lock =
- eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock;
-
- spin_lock(lists_lock);
- zbudpage_spin_lock(zbudpage);
- if (zbudpage_is_dying(zbudpage)) {
- /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */
- goto out;
- }
- zbpg = kmap_zbudpage_atomic(zbudpage);
- to_va = kmap_atomic(data_page);
- if (budnum == 0)
- size = zbudpage->zbud0_size;
- else
- size = zbudpage->zbud1_size;
- BUG_ON(size == 0 || size > zbud_max_size());
- from_va = zbud_data(zbpg, budnum, size);
- from_va += sizeof(struct tmem_handle);
- size -= sizeof(struct tmem_handle);
- decompress(from_va, size, to_va);
- kunmap_atomic(to_va);
- kunmap_zbudpage_atomic(zbpg);
- ret = 0;
-out:
- zbudpage_spin_unlock(zbudpage);
- spin_unlock(lists_lock);
- return ret;
-}
-
-/*
- * Given a zbudref and a kernel pointer, copy the data from
- * the zbud to the kernel pointer.
- */
-int zbud_copy_from_zbud(char *to_va, struct zbudref *zref,
- size_t *sizep, bool eph)
-{
- struct zbudpage *zbudpage = zbudref_to_zbudpage(zref);
- unsigned long budnum = zbudref_budnum(zref);
- void *zbpg;
- char *from_va;
- unsigned size;
- int ret = -1;
- spinlock_t *lists_lock =
- eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock;
-
- spin_lock(lists_lock);
- zbudpage_spin_lock(zbudpage);
- if (zbudpage_is_dying(zbudpage)) {
- /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */
- goto out;
- }
- zbpg = kmap_zbudpage_atomic(zbudpage);
- if (budnum == 0)
- size = zbudpage->zbud0_size;
- else
- size = zbudpage->zbud1_size;
- BUG_ON(size == 0 || size > zbud_max_size());
- from_va = zbud_data(zbpg, budnum, size);
- from_va += sizeof(struct tmem_handle);
- size -= sizeof(struct tmem_handle);
- *sizep = size;
- memcpy(to_va, from_va, size);
-
- kunmap_zbudpage_atomic(zbpg);
- ret = 0;
-out:
- zbudpage_spin_unlock(zbudpage);
- spin_unlock(lists_lock);
- return ret;
-}
-
-/*
- * Given a zbudref and a kernel pointer, copy the data from
- * the kernel pointer to the zbud.
- */
-int zbud_copy_to_zbud(struct zbudref *zref, char *from_va, bool eph)
-{
- struct zbudpage *zbudpage = zbudref_to_zbudpage(zref);
- unsigned long budnum = zbudref_budnum(zref);
- void *zbpg;
- char *to_va;
- unsigned size;
- int ret = -1;
- spinlock_t *lists_lock =
- eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock;
-
- spin_lock(lists_lock);
- zbudpage_spin_lock(zbudpage);
- if (zbudpage_is_dying(zbudpage)) {
- /* ignore dying zbudpage... see zbud_evict_pageframe_lru() */
- goto out;
- }
- zbpg = kmap_zbudpage_atomic(zbudpage);
- if (budnum == 0)
- size = zbudpage->zbud0_size;
- else
- size = zbudpage->zbud1_size;
- BUG_ON(size == 0 || size > zbud_max_size());
- to_va = zbud_data(zbpg, budnum, size);
- to_va += sizeof(struct tmem_handle);
- size -= sizeof(struct tmem_handle);
- memcpy(to_va, from_va, size);
-
- kunmap_zbudpage_atomic(zbpg);
- ret = 0;
-out:
- zbudpage_spin_unlock(zbudpage);
- spin_unlock(lists_lock);
- return ret;
-}
-
-/*
- * Choose an ephemeral LRU zbudpage that is evictable (not locked), ensure
- * there are no references to it remaining, and return the now unused
- * (and re-init'ed) struct page and the total amount of compressed
- * data that was evicted.
- */
-struct page *zbud_evict_pageframe_lru(unsigned int *zsize, unsigned int *zpages)
-{
- struct zbudpage *zbudpage = NULL, *zbudpage2;
- struct zbud_unbuddied *unbud = zbud_eph_unbuddied;
- struct page *page = NULL;
- bool irqs_disabled = irqs_disabled();
-
- /*
- * Since this can be called indirectly from cleancache_put, which
- * has interrupts disabled, as well as frontswap_put, which does not,
- * we need to be able to handle both cases, even though it is ugly.
- */
- if (irqs_disabled)
- spin_lock(&zbud_eph_lists_lock);
- else
- spin_lock_bh(&zbud_eph_lists_lock);
- *zsize = 0;
- if (list_empty(&zbud_eph_lru_list))
- goto unlock_out;
- list_for_each_entry_safe(zbudpage, zbudpage2, &zbud_eph_lru_list, lru) {
- /* skip a locked zbudpage */
- if (unlikely(!zbudpage_spin_trylock(zbudpage)))
- continue;
- /* skip an unevictable zbudpage */
- if (unlikely(zbudpage->unevictable != 0)) {
- zbudpage_spin_unlock(zbudpage);
- continue;
- }
- /* got a locked evictable page */
- goto evict_page;
-
- }
-unlock_out:
- /* no unlocked evictable pages, give up */
- if (irqs_disabled)
- spin_unlock(&zbud_eph_lists_lock);
- else
- spin_unlock_bh(&zbud_eph_lists_lock);
- goto out;
-
-evict_page:
- list_del_init(&zbudpage->budlist);
- list_del_init(&zbudpage->lru);
- zbudpage_set_dying(zbudpage);
- /*
- * the zbudpage is now "dying" and attempts to read, write,
- * or delete data from it will be ignored
- */
- if (zbudpage->zbud0_size != 0 && zbudpage->zbud1_size != 0) {
- *zsize = zbudpage->zbud0_size + zbudpage->zbud1_size -
- (2 * sizeof(struct tmem_handle));
- *zpages = 2;
- } else if (zbudpage->zbud0_size != 0) {
- unbud[zbud_size_to_chunks(zbudpage->zbud0_size)].count--;
- *zsize = zbudpage->zbud0_size - sizeof(struct tmem_handle);
- *zpages = 1;
- } else if (zbudpage->zbud1_size != 0) {
- unbud[zbud_size_to_chunks(zbudpage->zbud1_size)].count--;
- *zsize = zbudpage->zbud1_size - sizeof(struct tmem_handle);
- *zpages = 1;
- } else {
- BUG();
- }
- spin_unlock(&zbud_eph_lists_lock);
- zbud_eph_evicted_pageframes++;
- if (*zpages == 1)
- zbud_eph_unbuddied_count--;
- else
- zbud_eph_buddied_count--;
- zbud_evict_tmem(zbudpage);
- zbudpage_spin_lock(zbudpage);
- zbudpage_clear_dying(zbudpage);
- page = zbud_unuse_zbudpage(zbudpage, true);
- if (!irqs_disabled)
- local_bh_enable();
-out:
- return page;
-}
-
-/*
- * Choose a persistent LRU zbudpage that is evictable (not locked), zombify it,
- * read the tmem_handle(s) out of it into the passed array, and return the
- * number of zbuds. Caller must perform necessary tmem functions and,
- * indirectly, zbud functions to fetch any valid data and cause the
- * now-zombified zbudpage to eventually be freed. We track the zombified
- * zbudpage count so it is possible to observe if there is a leak.
- FIXME: describe (ramster) case where data pointers are passed in for memcpy
- */
-unsigned int zbud_make_zombie_lru(struct tmem_handle *th, unsigned char **data,
- unsigned int *zsize, bool eph)
-{
- struct zbudpage *zbudpage = NULL, *zbudpag2;
- struct tmem_handle *thfrom;
- char *from_va;
- void *zbpg;
- unsigned size;
- int ret = 0, i;
- spinlock_t *lists_lock =
- eph ? &zbud_eph_lists_lock : &zbud_pers_lists_lock;
- struct list_head *lru_list =
- eph ? &zbud_eph_lru_list : &zbud_pers_lru_list;
-
- spin_lock_bh(lists_lock);
- if (list_empty(lru_list))
- goto out;
- list_for_each_entry_safe(zbudpage, zbudpag2, lru_list, lru) {
- /* skip a locked zbudpage */
- if (unlikely(!zbudpage_spin_trylock(zbudpage)))
- continue;
- /* skip an unevictable zbudpage */
- if (unlikely(zbudpage->unevictable != 0)) {
- zbudpage_spin_unlock(zbudpage);
- continue;
- }
- /* got a locked evictable page */
- goto zombify_page;
- }
- /* no unlocked evictable pages, give up */
- goto out;
-
-zombify_page:
- /* got an unlocked evictable page, zombify it */
- list_del_init(&zbudpage->budlist);
- zbudpage_set_zombie(zbudpage);
- /* FIXME what accounting do I need to do here? */
- list_del_init(&zbudpage->lru);
- if (eph) {
- list_add_tail(&zbudpage->lru, &zbud_eph_zombie_list);
- zbud_eph_zombie_count =
- atomic_inc_return(&zbud_eph_zombie_atomic);
- } else {
- list_add_tail(&zbudpage->lru, &zbud_pers_zombie_list);
- zbud_pers_zombie_count =
- atomic_inc_return(&zbud_pers_zombie_atomic);
- }
- /* FIXME what accounting do I need to do here? */
- zbpg = kmap_zbudpage_atomic(zbudpage);
- for (i = 0; i < 2; i++) {
- size = (i == 0) ? zbudpage->zbud0_size : zbudpage->zbud1_size;
- if (size) {
- from_va = zbud_data(zbpg, i, size);
- thfrom = (struct tmem_handle *)from_va;
- from_va += sizeof(struct tmem_handle);
- size -= sizeof(struct tmem_handle);
- if (th != NULL)
- th[ret] = *thfrom;
- if (data != NULL)
- memcpy(data[ret], from_va, size);
- if (zsize != NULL)
- *zsize++ = size;
- ret++;
- }
- }
- kunmap_zbudpage_atomic(zbpg);
- zbudpage_spin_unlock(zbudpage);
-out:
- spin_unlock_bh(lists_lock);
- return ret;
-}
-
-void zbud_init(void)
-{
- int i;
-
- zbud_debugfs_init();
- BUG_ON((sizeof(struct tmem_handle) * 2 > CHUNK_SIZE));
- BUG_ON(sizeof(struct zbudpage) > sizeof(struct page));
- for (i = 0; i < NCHUNKS; i++) {
- INIT_LIST_HEAD(&zbud_eph_unbuddied[i].list);
- INIT_LIST_HEAD(&zbud_pers_unbuddied[i].list);
- }
-}