diff options
Diffstat (limited to 'drivers/staging/zcache/ramster/ramster.c')
-rw-r--r-- | drivers/staging/zcache/ramster/ramster.c | 925 |
1 files changed, 0 insertions, 925 deletions
diff --git a/drivers/staging/zcache/ramster/ramster.c b/drivers/staging/zcache/ramster/ramster.c deleted file mode 100644 index a937ce1fa27a..000000000000 --- a/drivers/staging/zcache/ramster/ramster.c +++ /dev/null @@ -1,925 +0,0 @@ -/* - * ramster.c - * - * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp. - * - * RAMster implements peer-to-peer transcendent memory, allowing a "cluster" of - * kernels to dynamically pool their RAM so that a RAM-hungry workload on one - * machine can temporarily and transparently utilize RAM on another machine - * which is presumably idle or running a non-RAM-hungry workload. - * - * RAMster combines a clustering and messaging foundation based on the ocfs2 - * cluster layer with the in-kernel compression implementation of zcache, and - * adds code to glue them together. When a page is "put" to RAMster, it is - * compressed and stored locally. Periodically, a thread will "remotify" these - * pages by sending them via messages to a remote machine. When the page is - * later needed as indicated by a page fault, a "get" is issued. If the data - * is local, it is uncompressed and the fault is resolved. If the data is - * remote, a message is sent to fetch the data and the faulting thread sleeps; - * when the data arrives, the thread awakens, the data is decompressed and - * the fault is resolved. - - * As of V5, clusters up to eight nodes are supported; each node can remotify - * pages to one specified node, so clusters can be configured as clients to - * a "memory server". Some simple policy is in place that will need to be - * refined over time. Larger clusters and fault-resistant protocols can also - * be added over time. - */ - -#include <linux/module.h> -#include <linux/cpu.h> -#include <linux/highmem.h> -#include <linux/list.h> -#include <linux/lzo.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/atomic.h> -#include <linux/frontswap.h> -#include "../tmem.h" -#include "../zcache.h" -#include "../zbud.h" -#include "ramster.h" -#include "ramster_nodemanager.h" -#include "tcp.h" -#include "debug.h" - -#define RAMSTER_TESTING - -#ifndef CONFIG_SYSFS -#error "ramster needs sysfs to define cluster nodes to use" -#endif - -static bool use_cleancache __read_mostly; -static bool use_frontswap __read_mostly; -static bool use_frontswap_exclusive_gets __read_mostly; - -/* These must be sysfs not debugfs as they are checked/used by userland!! */ -static unsigned long ramster_interface_revision __read_mostly = - R2NM_API_VERSION; /* interface revision must match userspace! */ -static unsigned long ramster_pers_remotify_enable __read_mostly; -static unsigned long ramster_eph_remotify_enable __read_mostly; -static atomic_t ramster_remote_pers_pages = ATOMIC_INIT(0); -#define MANUAL_NODES 8 -static bool ramster_nodes_manual_up[MANUAL_NODES] __read_mostly; -static int ramster_remote_target_nodenum __read_mostly = -1; - -/* Used by this code. */ -long ramster_flnodes; -/* FIXME frontswap selfshrinking knobs in debugfs? */ - -static LIST_HEAD(ramster_rem_op_list); -static DEFINE_SPINLOCK(ramster_rem_op_list_lock); -static DEFINE_PER_CPU(struct ramster_preload, ramster_preloads); - -static DEFINE_PER_CPU(unsigned char *, ramster_remoteputmem1); -static DEFINE_PER_CPU(unsigned char *, ramster_remoteputmem2); - -static struct kmem_cache *ramster_flnode_cache __read_mostly; - -static struct flushlist_node *ramster_flnode_alloc(struct tmem_pool *pool) -{ - struct flushlist_node *flnode = NULL; - struct ramster_preload *kp; - - kp = &__get_cpu_var(ramster_preloads); - flnode = kp->flnode; - BUG_ON(flnode == NULL); - kp->flnode = NULL; - inc_ramster_flnodes(); - return flnode; -} - -/* the "flush list" asynchronously collects pages to remotely flush */ -#define FLUSH_ENTIRE_OBJECT ((uint32_t)-1) -static void ramster_flnode_free(struct flushlist_node *flnode, - struct tmem_pool *pool) -{ - dec_ramster_flnodes(); - BUG_ON(ramster_flnodes < 0); - kmem_cache_free(ramster_flnode_cache, flnode); -} - -int ramster_do_preload_flnode(struct tmem_pool *pool) -{ - struct ramster_preload *kp; - struct flushlist_node *flnode; - int ret = -ENOMEM; - - BUG_ON(!irqs_disabled()); - if (unlikely(ramster_flnode_cache == NULL)) - BUG(); - kp = &__get_cpu_var(ramster_preloads); - flnode = kmem_cache_alloc(ramster_flnode_cache, GFP_ATOMIC); - if (unlikely(flnode == NULL) && kp->flnode == NULL) - BUG(); /* FIXME handle more gracefully, but how??? */ - else if (kp->flnode == NULL) - kp->flnode = flnode; - else - kmem_cache_free(ramster_flnode_cache, flnode); - return ret; -} -EXPORT_SYMBOL_GPL(ramster_do_preload_flnode); - -/* - * Called by the message handler after a (still compressed) page has been - * fetched from the remote machine in response to an "is_remote" tmem_get - * or persistent tmem_localify. For a tmem_get, "extra" is the address of - * the page that is to be filled to successfully resolve the tmem_get; for - * a (persistent) tmem_localify, "extra" is NULL (as the data is placed only - * in the local zcache). "data" points to "size" bytes of (compressed) data - * passed in the message. In the case of a persistent remote get, if - * pre-allocation was successful (see ramster_repatriate_preload), the page - * is placed into both local zcache and at "extra". - */ -int ramster_localify(int pool_id, struct tmem_oid *oidp, uint32_t index, - char *data, unsigned int size, void *extra) -{ - int ret = -ENOENT; - unsigned long flags; - struct tmem_pool *pool; - bool eph, delete = false; - void *pampd, *saved_hb; - struct tmem_obj *obj; - - pool = zcache_get_pool_by_id(LOCAL_CLIENT, pool_id); - if (unlikely(pool == NULL)) - /* pool doesn't exist anymore */ - goto out; - eph = is_ephemeral(pool); - local_irq_save(flags); /* FIXME: maybe only disable softirqs? */ - pampd = tmem_localify_get_pampd(pool, oidp, index, &obj, &saved_hb); - if (pampd == NULL) { - /* hmmm... must have been a flush while waiting */ -#ifdef RAMSTER_TESTING - pr_err("UNTESTED pampd==NULL in ramster_localify\n"); -#endif - if (eph) - inc_ramster_remote_eph_pages_unsucc_get(); - else - inc_ramster_remote_pers_pages_unsucc_get(); - obj = NULL; - goto finish; - } else if (unlikely(!pampd_is_remote(pampd))) { - /* hmmm... must have been a dup put while waiting */ -#ifdef RAMSTER_TESTING - pr_err("UNTESTED dup while waiting in ramster_localify\n"); -#endif - if (eph) - inc_ramster_remote_eph_pages_unsucc_get(); - else - inc_ramster_remote_pers_pages_unsucc_get(); - obj = NULL; - pampd = NULL; - ret = -EEXIST; - goto finish; - } else if (size == 0) { - /* no remote data, delete the local is_remote pampd */ - pampd = NULL; - if (eph) - inc_ramster_remote_eph_pages_unsucc_get(); - else - BUG(); - delete = true; - goto finish; - } - if (pampd_is_intransit(pampd)) { - /* - * a pampd is marked intransit if it is remote and space has - * been allocated for it locally (note, only happens for - * persistent pages, in which case the remote copy is freed) - */ - BUG_ON(eph); - pampd = pampd_mask_intransit_and_remote(pampd); - zbud_copy_to_zbud(pampd, data, size); - } else { - /* - * setting pampd to NULL tells tmem_localify_finish to leave - * pampd alone... meaning it is left pointing to the - * remote copy - */ - pampd = NULL; - obj = NULL; - } - /* - * but in all cases, we decompress direct-to-memory to complete - * the remotify and return success - */ - BUG_ON(extra == NULL); - zcache_decompress_to_page(data, size, (struct page *)extra); - if (eph) - inc_ramster_remote_eph_pages_succ_get(); - else - inc_ramster_remote_pers_pages_succ_get(); - ret = 0; -finish: - tmem_localify_finish(obj, index, pampd, saved_hb, delete); - zcache_put_pool(pool); - local_irq_restore(flags); -out: - return ret; -} - -void ramster_pampd_new_obj(struct tmem_obj *obj) -{ - obj->extra = NULL; -} - -void ramster_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj, - bool pool_destroy) -{ - struct flushlist_node *flnode; - - BUG_ON(preemptible()); - if (obj->extra == NULL) - return; - if (pool_destroy && is_ephemeral(pool)) - /* FIXME don't bother with remote eph data for now */ - return; - BUG_ON(!pampd_is_remote(obj->extra)); - flnode = ramster_flnode_alloc(pool); - flnode->xh.client_id = pampd_remote_node(obj->extra); - flnode->xh.pool_id = pool->pool_id; - flnode->xh.oid = obj->oid; - flnode->xh.index = FLUSH_ENTIRE_OBJECT; - flnode->rem_op.op = RAMSTER_REMOTIFY_FLUSH_OBJ; - spin_lock(&ramster_rem_op_list_lock); - list_add(&flnode->rem_op.list, &ramster_rem_op_list); - spin_unlock(&ramster_rem_op_list_lock); -} - -/* - * Called on a remote persistent tmem_get to attempt to preallocate - * local storage for the data contained in the remote persistent page. - * If successfully preallocated, returns the pampd, marked as remote and - * in_transit. Else returns NULL. Note that the appropriate tmem data - * structure must be locked. - */ -void *ramster_pampd_repatriate_preload(void *pampd, struct tmem_pool *pool, - struct tmem_oid *oidp, uint32_t index, - bool *intransit) -{ - int clen = pampd_remote_size(pampd), c; - void *ret_pampd = NULL; - unsigned long flags; - struct tmem_handle th; - - BUG_ON(!pampd_is_remote(pampd)); - BUG_ON(is_ephemeral(pool)); - if (use_frontswap_exclusive_gets) - /* don't need local storage */ - goto out; - if (pampd_is_intransit(pampd)) { - /* - * to avoid multiple allocations (and maybe a memory leak) - * don't preallocate if already in the process of being - * repatriated - */ - *intransit = true; - goto out; - } - *intransit = false; - local_irq_save(flags); - th.client_id = pampd_remote_node(pampd); - th.pool_id = pool->pool_id; - th.oid = *oidp; - th.index = index; - ret_pampd = zcache_pampd_create(NULL, clen, true, false, &th); - if (ret_pampd != NULL) { - /* - * a pampd is marked intransit if it is remote and space has - * been allocated for it locally (note, only happens for - * persistent pages, in which case the remote copy is freed) - */ - ret_pampd = pampd_mark_intransit(ret_pampd); - c = atomic_dec_return(&ramster_remote_pers_pages); - WARN_ON_ONCE(c < 0); - } else { - inc_ramster_pers_pages_remote_nomem(); - } - local_irq_restore(flags); -out: - return ret_pampd; -} - -/* - * Called on a remote tmem_get to invoke a message to fetch the page. - * Might sleep so no tmem locks can be held. "extra" is passed - * all the way through the round-trip messaging to ramster_localify. - */ -int ramster_pampd_repatriate(void *fake_pampd, void *real_pampd, - struct tmem_pool *pool, - struct tmem_oid *oid, uint32_t index, - bool free, void *extra) -{ - struct tmem_xhandle xh; - int ret; - - if (pampd_is_intransit(real_pampd)) - /* have local space pre-reserved, so free remote copy */ - free = true; - xh = tmem_xhandle_fill(LOCAL_CLIENT, pool, oid, index); - /* unreliable request/response for now */ - ret = r2net_remote_async_get(&xh, free, - pampd_remote_node(fake_pampd), - pampd_remote_size(fake_pampd), - pampd_remote_cksum(fake_pampd), - extra); - return ret; -} - -bool ramster_pampd_is_remote(void *pampd) -{ - return pampd_is_remote(pampd); -} - -int ramster_pampd_replace_in_obj(void *new_pampd, struct tmem_obj *obj) -{ - int ret = -1; - - if (new_pampd != NULL) { - if (obj->extra == NULL) - obj->extra = new_pampd; - /* enforce that all remote pages in an object reside - * in the same node! */ - else if (pampd_remote_node(new_pampd) != - pampd_remote_node((void *)(obj->extra))) - BUG(); - ret = 0; - } - return ret; -} - -void *ramster_pampd_free(void *pampd, struct tmem_pool *pool, - struct tmem_oid *oid, uint32_t index, bool acct) -{ - bool eph = is_ephemeral(pool); - void *local_pampd = NULL; - int c; - - BUG_ON(preemptible()); - BUG_ON(!pampd_is_remote(pampd)); - WARN_ON(acct == false); - if (oid == NULL) { - /* - * a NULL oid means to ignore this pampd free - * as the remote freeing will be handled elsewhere - */ - } else if (eph) { - /* FIXME remote flush optional but probably good idea */ - } else if (pampd_is_intransit(pampd)) { - /* did a pers remote get_and_free, so just free local */ - local_pampd = pampd_mask_intransit_and_remote(pampd); - } else { - struct flushlist_node *flnode = - ramster_flnode_alloc(pool); - - flnode->xh.client_id = pampd_remote_node(pampd); - flnode->xh.pool_id = pool->pool_id; - flnode->xh.oid = *oid; - flnode->xh.index = index; - flnode->rem_op.op = RAMSTER_REMOTIFY_FLUSH_PAGE; - spin_lock(&ramster_rem_op_list_lock); - list_add(&flnode->rem_op.list, &ramster_rem_op_list); - spin_unlock(&ramster_rem_op_list_lock); - c = atomic_dec_return(&ramster_remote_pers_pages); - WARN_ON_ONCE(c < 0); - } - return local_pampd; -} -EXPORT_SYMBOL_GPL(ramster_pampd_free); - -void ramster_count_foreign_pages(bool eph, int count) -{ - BUG_ON(count != 1 && count != -1); - if (eph) { - if (count > 0) { - inc_ramster_foreign_eph_pages(); - } else { - dec_ramster_foreign_eph_pages(); -#ifdef CONFIG_RAMSTER_DEBUG - WARN_ON_ONCE(ramster_foreign_eph_pages < 0); -#endif - } - } else { - if (count > 0) { - inc_ramster_foreign_pers_pages(); - } else { - dec_ramster_foreign_pers_pages(); -#ifdef CONFIG_RAMSTER_DEBUG - WARN_ON_ONCE(ramster_foreign_pers_pages < 0); -#endif - } - } -} -EXPORT_SYMBOL_GPL(ramster_count_foreign_pages); - -/* - * For now, just push over a few pages every few seconds to - * ensure that it basically works - */ -static struct workqueue_struct *ramster_remotify_workqueue; -static void ramster_remotify_process(struct work_struct *work); -static DECLARE_DELAYED_WORK(ramster_remotify_worker, - ramster_remotify_process); - -static void ramster_remotify_queue_delayed_work(unsigned long delay) -{ - if (!queue_delayed_work(ramster_remotify_workqueue, - &ramster_remotify_worker, delay)) - pr_err("ramster_remotify: bad workqueue\n"); -} - -static void ramster_remote_flush_page(struct flushlist_node *flnode) -{ - struct tmem_xhandle *xh; - int remotenode, ret; - - preempt_disable(); - xh = &flnode->xh; - remotenode = flnode->xh.client_id; - ret = r2net_remote_flush(xh, remotenode); - if (ret >= 0) - inc_ramster_remote_pages_flushed(); - else - inc_ramster_remote_page_flushes_failed(); - preempt_enable_no_resched(); - ramster_flnode_free(flnode, NULL); -} - -static void ramster_remote_flush_object(struct flushlist_node *flnode) -{ - struct tmem_xhandle *xh; - int remotenode, ret; - - preempt_disable(); - xh = &flnode->xh; - remotenode = flnode->xh.client_id; - ret = r2net_remote_flush_object(xh, remotenode); - if (ret >= 0) - inc_ramster_remote_objects_flushed(); - else - inc_ramster_remote_object_flushes_failed(); - preempt_enable_no_resched(); - ramster_flnode_free(flnode, NULL); -} - -int ramster_remotify_pageframe(bool eph) -{ - struct tmem_xhandle xh; - unsigned int size; - int remotenode, ret, zbuds; - struct tmem_pool *pool; - unsigned long flags; - unsigned char cksum; - char *p; - int i, j; - unsigned char *tmpmem[2]; - struct tmem_handle th[2]; - unsigned int zsize[2]; - - tmpmem[0] = __get_cpu_var(ramster_remoteputmem1); - tmpmem[1] = __get_cpu_var(ramster_remoteputmem2); - local_bh_disable(); - zbuds = zbud_make_zombie_lru(&th[0], &tmpmem[0], &zsize[0], eph); - /* now OK to release lock set in caller */ - local_bh_enable(); - if (zbuds == 0) - goto out; - BUG_ON(zbuds > 2); - for (i = 0; i < zbuds; i++) { - xh.client_id = th[i].client_id; - xh.pool_id = th[i].pool_id; - xh.oid = th[i].oid; - xh.index = th[i].index; - size = zsize[i]; - BUG_ON(size == 0 || size > zbud_max_buddy_size()); - for (p = tmpmem[i], cksum = 0, j = 0; j < size; j++) - cksum += *p++; - ret = r2net_remote_put(&xh, tmpmem[i], size, eph, &remotenode); - if (ret != 0) { - /* - * This is some form of a memory leak... if the remote put - * fails, there will never be another attempt to remotify - * this page. But since we've dropped the zv pointer, - * the page may have been freed or the data replaced - * so we can't just "put it back" in the remote op list. - * Even if we could, not sure where to put it in the list - * because there may be flushes that must be strictly - * ordered vs the put. So leave this as a FIXME for now. - * But count them so we know if it becomes a problem. - */ - if (eph) - inc_ramster_eph_pages_remote_failed(); - else - inc_ramster_pers_pages_remote_failed(); - break; - } else { - if (!eph) - atomic_inc(&ramster_remote_pers_pages); - } - if (eph) - inc_ramster_eph_pages_remoted(); - else - inc_ramster_pers_pages_remoted(); - /* - * data was successfully remoted so change the local version to - * point to the remote node where it landed - */ - local_bh_disable(); - pool = zcache_get_pool_by_id(LOCAL_CLIENT, xh.pool_id); - local_irq_save(flags); - (void)tmem_replace(pool, &xh.oid, xh.index, - pampd_make_remote(remotenode, size, cksum)); - local_irq_restore(flags); - zcache_put_pool(pool); - local_bh_enable(); - } -out: - return zbuds; -} - -static void zcache_do_remotify_flushes(void) -{ - struct ramster_remotify_hdr *rem_op; - union remotify_list_node *u; - - while (1) { - spin_lock(&ramster_rem_op_list_lock); - if (list_empty(&ramster_rem_op_list)) { - spin_unlock(&ramster_rem_op_list_lock); - goto out; - } - rem_op = list_first_entry(&ramster_rem_op_list, - struct ramster_remotify_hdr, list); - list_del_init(&rem_op->list); - spin_unlock(&ramster_rem_op_list_lock); - u = (union remotify_list_node *)rem_op; - switch (rem_op->op) { - case RAMSTER_REMOTIFY_FLUSH_PAGE: - ramster_remote_flush_page((struct flushlist_node *)u); - break; - case RAMSTER_REMOTIFY_FLUSH_OBJ: - ramster_remote_flush_object((struct flushlist_node *)u); - break; - default: - BUG(); - } - } -out: - return; -} - -static void ramster_remotify_process(struct work_struct *work) -{ - static bool remotify_in_progress; - int i; - - BUG_ON(irqs_disabled()); - if (remotify_in_progress) - goto requeue; - if (ramster_remote_target_nodenum == -1) - goto requeue; - remotify_in_progress = true; - if (use_cleancache && ramster_eph_remotify_enable) { - for (i = 0; i < 100; i++) { - zcache_do_remotify_flushes(); - (void)ramster_remotify_pageframe(true); - } - } - if (use_frontswap && ramster_pers_remotify_enable) { - for (i = 0; i < 100; i++) { - zcache_do_remotify_flushes(); - (void)ramster_remotify_pageframe(false); - } - } - remotify_in_progress = false; -requeue: - ramster_remotify_queue_delayed_work(HZ); -} - -void ramster_remotify_init(void) -{ - unsigned long n = 60UL; - ramster_remotify_workqueue = - create_singlethread_workqueue("ramster_remotify"); - ramster_remotify_queue_delayed_work(n * HZ); -} - -static ssize_t ramster_manual_node_up_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - int i; - char *p = buf; - for (i = 0; i < MANUAL_NODES; i++) - if (ramster_nodes_manual_up[i]) - p += sprintf(p, "%d ", i); - p += sprintf(p, "\n"); - return p - buf; -} - -static ssize_t ramster_manual_node_up_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - int err; - unsigned long node_num; - - err = kstrtoul(buf, 10, &node_num); - if (err) { - pr_err("ramster: bad strtoul?\n"); - return -EINVAL; - } - if (node_num >= MANUAL_NODES) { - pr_err("ramster: bad node_num=%lu?\n", node_num); - return -EINVAL; - } - if (ramster_nodes_manual_up[node_num]) { - pr_err("ramster: node %d already up, ignoring\n", - (int)node_num); - } else { - ramster_nodes_manual_up[node_num] = true; - r2net_hb_node_up_manual((int)node_num); - } - return count; -} - -static struct kobj_attribute ramster_manual_node_up_attr = { - .attr = { .name = "manual_node_up", .mode = 0644 }, - .show = ramster_manual_node_up_show, - .store = ramster_manual_node_up_store, -}; - -static ssize_t ramster_remote_target_nodenum_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - if (ramster_remote_target_nodenum == -1UL) - return sprintf(buf, "unset\n"); - else - return sprintf(buf, "%d\n", ramster_remote_target_nodenum); -} - -static ssize_t ramster_remote_target_nodenum_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - int err; - unsigned long node_num; - - err = kstrtoul(buf, 10, &node_num); - if (err) { - pr_err("ramster: bad strtoul?\n"); - return -EINVAL; - } else if (node_num == -1UL) { - pr_err("ramster: disabling all remotification, " - "data may still reside on remote nodes however\n"); - return -EINVAL; - } else if (node_num >= MANUAL_NODES) { - pr_err("ramster: bad node_num=%lu?\n", node_num); - return -EINVAL; - } else if (!ramster_nodes_manual_up[node_num]) { - pr_err("ramster: node %d not up, ignoring setting " - "of remotification target\n", (int)node_num); - } else if (r2net_remote_target_node_set((int)node_num) >= 0) { - pr_info("ramster: node %d set as remotification target\n", - (int)node_num); - ramster_remote_target_nodenum = (int)node_num; - } else { - pr_err("ramster: bad num to node node_num=%d?\n", - (int)node_num); - return -EINVAL; - } - return count; -} - -static struct kobj_attribute ramster_remote_target_nodenum_attr = { - .attr = { .name = "remote_target_nodenum", .mode = 0644 }, - .show = ramster_remote_target_nodenum_show, - .store = ramster_remote_target_nodenum_store, -}; - -#define RAMSTER_SYSFS_RO(_name) \ - static ssize_t ramster_##_name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, char *buf) \ - { \ - return sprintf(buf, "%lu\n", ramster_##_name); \ - } \ - static struct kobj_attribute ramster_##_name##_attr = { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .show = ramster_##_name##_show, \ - } - -#define RAMSTER_SYSFS_RW(_name) \ - static ssize_t ramster_##_name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, char *buf) \ - { \ - return sprintf(buf, "%lu\n", ramster_##_name); \ - } \ - static ssize_t ramster_##_name##_store(struct kobject *kobj, \ - struct kobj_attribute *attr, const char *buf, size_t count) \ - { \ - int err; \ - unsigned long enable; \ - err = kstrtoul(buf, 10, &enable); \ - if (err) \ - return -EINVAL; \ - ramster_##_name = enable; \ - return count; \ - } \ - static struct kobj_attribute ramster_##_name##_attr = { \ - .attr = { .name = __stringify(_name), .mode = 0644 }, \ - .show = ramster_##_name##_show, \ - .store = ramster_##_name##_store, \ - } - -#define RAMSTER_SYSFS_RO_ATOMIC(_name) \ - static ssize_t ramster_##_name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, char *buf) \ - { \ - return sprintf(buf, "%d\n", atomic_read(&ramster_##_name)); \ - } \ - static struct kobj_attribute ramster_##_name##_attr = { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .show = ramster_##_name##_show, \ - } - -RAMSTER_SYSFS_RO(interface_revision); -RAMSTER_SYSFS_RO_ATOMIC(remote_pers_pages); -RAMSTER_SYSFS_RW(pers_remotify_enable); -RAMSTER_SYSFS_RW(eph_remotify_enable); - -static struct attribute *ramster_attrs[] = { - &ramster_interface_revision_attr.attr, - &ramster_remote_pers_pages_attr.attr, - &ramster_manual_node_up_attr.attr, - &ramster_remote_target_nodenum_attr.attr, - &ramster_pers_remotify_enable_attr.attr, - &ramster_eph_remotify_enable_attr.attr, - NULL, -}; - -static struct attribute_group ramster_attr_group = { - .attrs = ramster_attrs, - .name = "ramster", -}; - -/* - * frontswap selfshrinking - */ - -/* In HZ, controls frequency of worker invocation. */ -static unsigned int selfshrink_interval __read_mostly = 5; -/* Enable/disable with sysfs. */ -static bool frontswap_selfshrinking __read_mostly; - -static void selfshrink_process(struct work_struct *work); -static DECLARE_DELAYED_WORK(selfshrink_worker, selfshrink_process); - -#ifndef CONFIG_RAMSTER_MODULE -/* Enable/disable with kernel boot option. */ -static bool use_frontswap_selfshrink = true; -#endif - -/* - * The default values for the following parameters were deemed reasonable - * by experimentation, may be workload-dependent, and can all be - * adjusted via sysfs. - */ - -/* Control rate for frontswap shrinking. Higher hysteresis is slower. */ -static unsigned int frontswap_hysteresis __read_mostly = 20; - -/* - * Number of selfshrink worker invocations to wait before observing that - * frontswap selfshrinking should commence. Note that selfshrinking does - * not use a separate worker thread. - */ -static unsigned int frontswap_inertia __read_mostly = 3; - -/* Countdown to next invocation of frontswap_shrink() */ -static unsigned long frontswap_inertia_counter; - -/* - * Invoked by the selfshrink worker thread, uses current number of pages - * in frontswap (frontswap_curr_pages()), previous status, and control - * values (hysteresis and inertia) to determine if frontswap should be - * shrunk and what the new frontswap size should be. Note that - * frontswap_shrink is essentially a partial swapoff that immediately - * transfers pages from the "swap device" (frontswap) back into kernel - * RAM; despite the name, frontswap "shrinking" is very different from - * the "shrinker" interface used by the kernel MM subsystem to reclaim - * memory. - */ -static void frontswap_selfshrink(void) -{ - static unsigned long cur_frontswap_pages; - static unsigned long last_frontswap_pages; - static unsigned long tgt_frontswap_pages; - - last_frontswap_pages = cur_frontswap_pages; - cur_frontswap_pages = frontswap_curr_pages(); - if (!cur_frontswap_pages || - (cur_frontswap_pages > last_frontswap_pages)) { - frontswap_inertia_counter = frontswap_inertia; - return; - } - if (frontswap_inertia_counter && --frontswap_inertia_counter) - return; - if (cur_frontswap_pages <= frontswap_hysteresis) - tgt_frontswap_pages = 0; - else - tgt_frontswap_pages = cur_frontswap_pages - - (cur_frontswap_pages / frontswap_hysteresis); - frontswap_shrink(tgt_frontswap_pages); -} - -#ifndef CONFIG_RAMSTER_MODULE -static int __init ramster_nofrontswap_selfshrink_setup(char *s) -{ - use_frontswap_selfshrink = false; - return 1; -} - -__setup("noselfshrink", ramster_nofrontswap_selfshrink_setup); -#endif - -static void selfshrink_process(struct work_struct *work) -{ - if (frontswap_selfshrinking && frontswap_enabled) { - frontswap_selfshrink(); - schedule_delayed_work(&selfshrink_worker, - selfshrink_interval * HZ); - } -} - -void ramster_cpu_up(int cpu) -{ - unsigned char *p1 = kzalloc(PAGE_SIZE, GFP_KERNEL | __GFP_REPEAT); - unsigned char *p2 = kzalloc(PAGE_SIZE, GFP_KERNEL | __GFP_REPEAT); - BUG_ON(!p1 || !p2); - per_cpu(ramster_remoteputmem1, cpu) = p1; - per_cpu(ramster_remoteputmem2, cpu) = p2; -} -EXPORT_SYMBOL_GPL(ramster_cpu_up); - -void ramster_cpu_down(int cpu) -{ - struct ramster_preload *kp; - - kfree(per_cpu(ramster_remoteputmem1, cpu)); - per_cpu(ramster_remoteputmem1, cpu) = NULL; - kfree(per_cpu(ramster_remoteputmem2, cpu)); - per_cpu(ramster_remoteputmem2, cpu) = NULL; - kp = &per_cpu(ramster_preloads, cpu); - if (kp->flnode) { - kmem_cache_free(ramster_flnode_cache, kp->flnode); - kp->flnode = NULL; - } -} -EXPORT_SYMBOL_GPL(ramster_cpu_down); - -void ramster_register_pamops(struct tmem_pamops *pamops) -{ - pamops->free_obj = ramster_pampd_free_obj; - pamops->new_obj = ramster_pampd_new_obj; - pamops->replace_in_obj = ramster_pampd_replace_in_obj; - pamops->is_remote = ramster_pampd_is_remote; - pamops->repatriate = ramster_pampd_repatriate; - pamops->repatriate_preload = ramster_pampd_repatriate_preload; -} -EXPORT_SYMBOL_GPL(ramster_register_pamops); - -void ramster_init(bool cleancache, bool frontswap, - bool frontswap_exclusive_gets, - bool frontswap_selfshrink) -{ - int ret = 0; - - if (cleancache) - use_cleancache = true; - if (frontswap) - use_frontswap = true; - if (frontswap_exclusive_gets) - use_frontswap_exclusive_gets = true; - ramster_debugfs_init(); - ret = sysfs_create_group(mm_kobj, &ramster_attr_group); - if (ret) - pr_err("ramster: can't create sysfs for ramster\n"); - (void)r2net_register_handlers(); -#ifdef CONFIG_RAMSTER_MODULE - ret = r2nm_init(); - if (ret) - pr_err("ramster: can't init r2net\n"); - frontswap_selfshrinking = frontswap_selfshrink; -#else - frontswap_selfshrinking = use_frontswap_selfshrink; -#endif - INIT_LIST_HEAD(&ramster_rem_op_list); - ramster_flnode_cache = kmem_cache_create("ramster_flnode", - sizeof(struct flushlist_node), 0, 0, NULL); - if (frontswap_selfshrinking) { - pr_info("ramster: Initializing frontswap selfshrink driver.\n"); - schedule_delayed_work(&selfshrink_worker, - selfshrink_interval * HZ); - } - ramster_remotify_init(); -} -EXPORT_SYMBOL_GPL(ramster_init); |