aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-11 18:05:37 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-11 18:05:37 -0800
commit608ff1a210ab0e8b969399039bf8e18693605910 (patch)
treefaea7bb1764461c73d0953089bd5439d91733a03 /drivers
parentMerge tag 'usb-3.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb (diff)
parentmemory_hotplug: ensure every online node has NORMAL memory (diff)
downloadlinux-dev-608ff1a210ab0e8b969399039bf8e18693605910.tar.xz
linux-dev-608ff1a210ab0e8b969399039bf8e18693605910.zip
Merge branch 'akpm' (Andrew's patchbomb)
Merge misc updates from Andrew Morton: "About half of most of MM. Going very early this time due to uncertainty over the coreautounifiednumasched things. I'll send the other half of most of MM tomorrow. The rest of MM awaits a slab merge from Pekka." * emailed patches from Andrew Morton: (71 commits) memory_hotplug: ensure every online node has NORMAL memory memory_hotplug: handle empty zone when online_movable/online_kernel mm, memory-hotplug: dynamic configure movable memory and portion memory drivers/base/node.c: cleanup node_state_attr[] bootmem: fix wrong call parameter for free_bootmem() avr32, kconfig: remove HAVE_ARCH_BOOTMEM mm: cma: remove watermark hacks mm: cma: skip watermarks check for already isolated blocks in split_free_page() mm, oom: fix race when specifying a thread as the oom origin mm, oom: change type of oom_score_adj to short mm: cleanup register_node() mm, mempolicy: remove duplicate code mm/vmscan.c: try_to_freeze() returns boolean mm: introduce putback_movable_pages() virtio_balloon: introduce migration primitives to balloon pages mm: introduce compaction and migration for ballooned pages mm: introduce a common interface for balloon pages mobility mm: redefine address_space.assoc_mapping mm: adjust address_space_operations.migratepage() return code arch/sparc/kernel/sys_sparc_64.c: s/COLOUR/COLOR/ ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/base/memory.c42
-rw-r--r--drivers/base/node.c78
-rw-r--r--drivers/macintosh/smu.c2
-rw-r--r--drivers/staging/android/lowmemorykiller.c16
-rw-r--r--drivers/virtio/virtio_balloon.c151
5 files changed, 222 insertions, 67 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 86c88216a503..987604d56c83 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -70,6 +70,13 @@ void unregister_memory_isolate_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_memory_isolate_notifier);
+static void memory_block_release(struct device *dev)
+{
+ struct memory_block *mem = container_of(dev, struct memory_block, dev);
+
+ kfree(mem);
+}
+
/*
* register_memory - Setup a sysfs device for a memory block
*/
@@ -80,6 +87,7 @@ int register_memory(struct memory_block *memory)
memory->dev.bus = &memory_subsys;
memory->dev.id = memory->start_section_nr / sections_per_block;
+ memory->dev.release = memory_block_release;
error = device_register(&memory->dev);
return error;
@@ -246,7 +254,7 @@ static bool pages_correctly_reserved(unsigned long start_pfn,
* OK to have direct references to sparsemem variables in here.
*/
static int
-memory_block_action(unsigned long phys_index, unsigned long action)
+memory_block_action(unsigned long phys_index, unsigned long action, int online_type)
{
unsigned long start_pfn;
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
@@ -261,7 +269,7 @@ memory_block_action(unsigned long phys_index, unsigned long action)
if (!pages_correctly_reserved(start_pfn, nr_pages))
return -EBUSY;
- ret = online_pages(start_pfn, nr_pages);
+ ret = online_pages(start_pfn, nr_pages, online_type);
break;
case MEM_OFFLINE:
ret = offline_pages(start_pfn, nr_pages);
@@ -276,7 +284,8 @@ memory_block_action(unsigned long phys_index, unsigned long action)
}
static int __memory_block_change_state(struct memory_block *mem,
- unsigned long to_state, unsigned long from_state_req)
+ unsigned long to_state, unsigned long from_state_req,
+ int online_type)
{
int ret = 0;
@@ -288,7 +297,7 @@ static int __memory_block_change_state(struct memory_block *mem,
if (to_state == MEM_OFFLINE)
mem->state = MEM_GOING_OFFLINE;
- ret = memory_block_action(mem->start_section_nr, to_state);
+ ret = memory_block_action(mem->start_section_nr, to_state, online_type);
if (ret) {
mem->state = from_state_req;
@@ -311,12 +320,14 @@ out:
}
static int memory_block_change_state(struct memory_block *mem,
- unsigned long to_state, unsigned long from_state_req)
+ unsigned long to_state, unsigned long from_state_req,
+ int online_type)
{
int ret;
mutex_lock(&mem->state_mutex);
- ret = __memory_block_change_state(mem, to_state, from_state_req);
+ ret = __memory_block_change_state(mem, to_state, from_state_req,
+ online_type);
mutex_unlock(&mem->state_mutex);
return ret;
@@ -330,10 +341,18 @@ store_mem_state(struct device *dev,
mem = container_of(dev, struct memory_block, dev);
- if (!strncmp(buf, "online", min((int)count, 6)))
- ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
- else if(!strncmp(buf, "offline", min((int)count, 7)))
- ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
+ if (!strncmp(buf, "online_kernel", min_t(int, count, 13)))
+ ret = memory_block_change_state(mem, MEM_ONLINE,
+ MEM_OFFLINE, ONLINE_KERNEL);
+ else if (!strncmp(buf, "online_movable", min_t(int, count, 14)))
+ ret = memory_block_change_state(mem, MEM_ONLINE,
+ MEM_OFFLINE, ONLINE_MOVABLE);
+ else if (!strncmp(buf, "online", min_t(int, count, 6)))
+ ret = memory_block_change_state(mem, MEM_ONLINE,
+ MEM_OFFLINE, ONLINE_KEEP);
+ else if(!strncmp(buf, "offline", min_t(int, count, 7)))
+ ret = memory_block_change_state(mem, MEM_OFFLINE,
+ MEM_ONLINE, -1);
if (ret)
return ret;
@@ -635,7 +654,6 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
mem_remove_simple_file(mem, phys_device);
mem_remove_simple_file(mem, removable);
unregister_memory(mem);
- kfree(mem);
} else
kobject_put(&mem->dev.kobj);
@@ -669,7 +687,7 @@ int offline_memory_block(struct memory_block *mem)
mutex_lock(&mem->state_mutex);
if (mem->state != MEM_OFFLINE)
- ret = __memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
+ ret = __memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE, -1);
mutex_unlock(&mem->state_mutex);
return ret;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index af1a177216f1..294e31626210 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -252,6 +252,24 @@ static inline void hugetlb_register_node(struct node *node) {}
static inline void hugetlb_unregister_node(struct node *node) {}
#endif
+static void node_device_release(struct device *dev)
+{
+ struct node *node = to_node(dev);
+
+#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
+ /*
+ * We schedule the work only when a memory section is
+ * onlined/offlined on this node. When we come here,
+ * all the memory on this node has been offlined,
+ * so we won't enqueue new work to this work.
+ *
+ * The work is using node->node_work, so we should
+ * flush work before freeing the memory.
+ */
+ flush_work(&node->node_work);
+#endif
+ kfree(node);
+}
/*
* register_node - Setup a sysfs device for a node.
@@ -259,12 +277,13 @@ static inline void hugetlb_unregister_node(struct node *node) {}
*
* Initialize and register the node device.
*/
-int register_node(struct node *node, int num, struct node *parent)
+static int register_node(struct node *node, int num, struct node *parent)
{
int error;
node->dev.id = num;
node->dev.bus = &node_subsys;
+ node->dev.release = node_device_release;
error = device_register(&node->dev);
if (!error){
@@ -306,7 +325,7 @@ void unregister_node(struct node *node)
device_unregister(&node->dev);
}
-struct node node_devices[MAX_NUMNODES];
+struct node *node_devices[MAX_NUMNODES];
/*
* register cpu under node
@@ -323,15 +342,15 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid)
if (!obj)
return 0;
- ret = sysfs_create_link(&node_devices[nid].dev.kobj,
+ ret = sysfs_create_link(&node_devices[nid]->dev.kobj,
&obj->kobj,
kobject_name(&obj->kobj));
if (ret)
return ret;
return sysfs_create_link(&obj->kobj,
- &node_devices[nid].dev.kobj,
- kobject_name(&node_devices[nid].dev.kobj));
+ &node_devices[nid]->dev.kobj,
+ kobject_name(&node_devices[nid]->dev.kobj));
}
int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
@@ -345,10 +364,10 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
if (!obj)
return 0;
- sysfs_remove_link(&node_devices[nid].dev.kobj,
+ sysfs_remove_link(&node_devices[nid]->dev.kobj,
kobject_name(&obj->kobj));
sysfs_remove_link(&obj->kobj,
- kobject_name(&node_devices[nid].dev.kobj));
+ kobject_name(&node_devices[nid]->dev.kobj));
return 0;
}
@@ -390,15 +409,15 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
continue;
if (page_nid != nid)
continue;
- ret = sysfs_create_link_nowarn(&node_devices[nid].dev.kobj,
+ ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
&mem_blk->dev.kobj,
kobject_name(&mem_blk->dev.kobj));
if (ret)
return ret;
return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
- &node_devices[nid].dev.kobj,
- kobject_name(&node_devices[nid].dev.kobj));
+ &node_devices[nid]->dev.kobj,
+ kobject_name(&node_devices[nid]->dev.kobj));
}
/* mem section does not span the specified node */
return 0;
@@ -431,10 +450,10 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
continue;
if (node_test_and_set(nid, *unlinked_nodes))
continue;
- sysfs_remove_link(&node_devices[nid].dev.kobj,
+ sysfs_remove_link(&node_devices[nid]->dev.kobj,
kobject_name(&mem_blk->dev.kobj));
sysfs_remove_link(&mem_blk->dev.kobj,
- kobject_name(&node_devices[nid].dev.kobj));
+ kobject_name(&node_devices[nid]->dev.kobj));
}
NODEMASK_FREE(unlinked_nodes);
return 0;
@@ -500,7 +519,7 @@ static void node_hugetlb_work(struct work_struct *work)
static void init_node_hugetlb_work(int nid)
{
- INIT_WORK(&node_devices[nid].node_work, node_hugetlb_work);
+ INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
}
static int node_memory_callback(struct notifier_block *self,
@@ -517,7 +536,7 @@ static int node_memory_callback(struct notifier_block *self,
* when transitioning to/from memoryless state.
*/
if (nid != NUMA_NO_NODE)
- schedule_work(&node_devices[nid].node_work);
+ schedule_work(&node_devices[nid]->node_work);
break;
case MEM_GOING_ONLINE:
@@ -558,9 +577,13 @@ int register_one_node(int nid)
struct node *parent = NULL;
if (p_node != nid)
- parent = &node_devices[p_node];
+ parent = node_devices[p_node];
+
+ node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
+ if (!node_devices[nid])
+ return -ENOMEM;
- error = register_node(&node_devices[nid], nid, parent);
+ error = register_node(node_devices[nid], nid, parent);
/* link cpu under this node */
for_each_present_cpu(cpu) {
@@ -581,7 +604,8 @@ int register_one_node(int nid)
void unregister_one_node(int nid)
{
- unregister_node(&node_devices[nid]);
+ unregister_node(node_devices[nid]);
+ node_devices[nid] = NULL;
}
/*
@@ -614,23 +638,23 @@ static ssize_t show_node_state(struct device *dev,
{ __ATTR(name, 0444, show_node_state, NULL), state }
static struct node_attr node_state_attr[] = {
- _NODE_ATTR(possible, N_POSSIBLE),
- _NODE_ATTR(online, N_ONLINE),
- _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
- _NODE_ATTR(has_cpu, N_CPU),
+ [N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE),
+ [N_ONLINE] = _NODE_ATTR(online, N_ONLINE),
+ [N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
#ifdef CONFIG_HIGHMEM
- _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
+ [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
#endif
+ [N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
};
static struct attribute *node_state_attrs[] = {
- &node_state_attr[0].attr.attr,
- &node_state_attr[1].attr.attr,
- &node_state_attr[2].attr.attr,
- &node_state_attr[3].attr.attr,
+ &node_state_attr[N_POSSIBLE].attr.attr,
+ &node_state_attr[N_ONLINE].attr.attr,
+ &node_state_attr[N_NORMAL_MEMORY].attr.attr,
#ifdef CONFIG_HIGHMEM
- &node_state_attr[4].attr.attr,
+ &node_state_attr[N_HIGH_MEMORY].attr.attr,
#endif
+ &node_state_attr[N_CPU].attr.attr,
NULL
};
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 7d5a6b40b31c..196368009001 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -565,7 +565,7 @@ fail_msg_node:
fail_db_node:
of_node_put(smu->db_node);
fail_bootmem:
- free_bootmem((unsigned long)smu, sizeof(struct smu_device));
+ free_bootmem(__pa(smu), sizeof(struct smu_device));
smu = NULL;
fail_np:
of_node_put(np);
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index b91e4bc332a7..3b91b0fd4de3 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -40,7 +40,7 @@
#include <linux/notifier.h>
static uint32_t lowmem_debug_level = 2;
-static int lowmem_adj[6] = {
+static short lowmem_adj[6] = {
0,
1,
6,
@@ -70,9 +70,9 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
int rem = 0;
int tasksize;
int i;
- int min_score_adj = OOM_SCORE_ADJ_MAX + 1;
+ short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
int selected_tasksize = 0;
- int selected_oom_score_adj;
+ short selected_oom_score_adj;
int array_size = ARRAY_SIZE(lowmem_adj);
int other_free = global_page_state(NR_FREE_PAGES);
int other_file = global_page_state(NR_FILE_PAGES) -
@@ -90,7 +90,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
}
}
if (sc->nr_to_scan > 0)
- lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n",
+ lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n",
sc->nr_to_scan, sc->gfp_mask, other_free,
other_file, min_score_adj);
rem = global_page_state(NR_ACTIVE_ANON) +
@@ -107,7 +107,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
rcu_read_lock();
for_each_process(tsk) {
struct task_struct *p;
- int oom_score_adj;
+ short oom_score_adj;
if (tsk->flags & PF_KTHREAD)
continue;
@@ -141,11 +141,11 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
selected = p;
selected_tasksize = tasksize;
selected_oom_score_adj = oom_score_adj;
- lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n",
+ lowmem_print(2, "select %d (%s), adj %hd, size %d, to kill\n",
p->pid, p->comm, oom_score_adj, tasksize);
}
if (selected) {
- lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
+ lowmem_print(1, "send sigkill to %d (%s), adj %hd, size %d\n",
selected->pid, selected->comm,
selected_oom_score_adj, selected_tasksize);
lowmem_deathpending_timeout = jiffies + HZ;
@@ -176,7 +176,7 @@ static void __exit lowmem_exit(void)
}
module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
-module_param_array_named(adj, lowmem_adj, int, &lowmem_adj_size,
+module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size,
S_IRUGO | S_IWUSR);
module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
S_IRUGO | S_IWUSR);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 0908e6044333..2a70558b36ea 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -27,13 +27,15 @@
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/balloon_compaction.h>
/*
* Balloon device works in 4K page units. So each page is pointed to by
* multiple balloon pages. All memory counters in this driver are in balloon
* page units.
*/
-#define VIRTIO_BALLOON_PAGES_PER_PAGE (PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
+#define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
+#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
struct virtio_balloon
{
@@ -52,15 +54,19 @@ struct virtio_balloon
/* Number of balloon pages we've told the Host we're not using. */
unsigned int num_pages;
/*
- * The pages we've told the Host we're not using.
+ * The pages we've told the Host we're not using are enqueued
+ * at vb_dev_info->pages list.
* Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
* to num_pages above.
*/
- struct list_head pages;
+ struct balloon_dev_info *vb_dev_info;
+
+ /* Synchronize access/update to this struct virtio_balloon elements */
+ struct mutex balloon_lock;
/* The array of pfns we tell the Host about. */
unsigned int num_pfns;
- u32 pfns[256];
+ u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
/* Memory statistics */
int need_stats_update;
@@ -122,18 +128,21 @@ static void set_page_pfns(u32 pfns[], struct page *page)
static void fill_balloon(struct virtio_balloon *vb, size_t num)
{
+ struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
+
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));
+ mutex_lock(&vb->balloon_lock);
for (vb->num_pfns = 0; vb->num_pfns < num;
vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
- struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY |
- __GFP_NOMEMALLOC | __GFP_NOWARN);
+ struct page *page = balloon_page_enqueue(vb_dev_info);
+
if (!page) {
if (printk_ratelimit())
dev_printk(KERN_INFO, &vb->vdev->dev,
- "Out of puff! Can't get %zu pages\n",
- num);
+ "Out of puff! Can't get %u pages\n",
+ VIRTIO_BALLOON_PAGES_PER_PAGE);
/* Sleep for at least 1/5 of a second before retry. */
msleep(200);
break;
@@ -141,14 +150,12 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
set_page_pfns(vb->pfns + vb->num_pfns, page);
vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
totalram_pages--;
- list_add(&page->lru, &vb->pages);
}
- /* Didn't get any? Oh well. */
- if (vb->num_pfns == 0)
- return;
-
- tell_host(vb, vb->inflate_vq);
+ /* Did we get any? */
+ if (vb->num_pfns != 0)
+ tell_host(vb, vb->inflate_vq);
+ mutex_unlock(&vb->balloon_lock);
}
static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
@@ -157,7 +164,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
/* Find pfns pointing at start of each page, get pages and free them. */
for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
- __free_page(balloon_pfn_to_page(pfns[i]));
+ balloon_page_free(balloon_pfn_to_page(pfns[i]));
totalram_pages++;
}
}
@@ -165,14 +172,17 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
static void leak_balloon(struct virtio_balloon *vb, size_t num)
{
struct page *page;
+ struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));
+ mutex_lock(&vb->balloon_lock);
for (vb->num_pfns = 0; vb->num_pfns < num;
vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
- page = list_first_entry(&vb->pages, struct page, lru);
- list_del(&page->lru);
+ page = balloon_page_dequeue(vb_dev_info);
+ if (!page)
+ break;
set_page_pfns(vb->pfns + vb->num_pfns, page);
vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
}
@@ -183,6 +193,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
* is true, we *have* to do it in this order
*/
tell_host(vb, vb->deflate_vq);
+ mutex_unlock(&vb->balloon_lock);
release_pages_by_pfn(vb->pfns, vb->num_pfns);
}
@@ -339,9 +350,84 @@ static int init_vqs(struct virtio_balloon *vb)
return 0;
}
+static const struct address_space_operations virtio_balloon_aops;
+#ifdef CONFIG_BALLOON_COMPACTION
+/*
+ * virtballoon_migratepage - perform the balloon page migration on behalf of
+ * a compation thread. (called under page lock)
+ * @mapping: the page->mapping which will be assigned to the new migrated page.
+ * @newpage: page that will replace the isolated page after migration finishes.
+ * @page : the isolated (old) page that is about to be migrated to newpage.
+ * @mode : compaction mode -- not used for balloon page migration.
+ *
+ * After a ballooned page gets isolated by compaction procedures, this is the
+ * function that performs the page migration on behalf of a compaction thread
+ * The page migration for virtio balloon is done in a simple swap fashion which
+ * follows these two macro steps:
+ * 1) insert newpage into vb->pages list and update the host about it;
+ * 2) update the host about the old page removed from vb->pages list;
+ *
+ * This function preforms the balloon page migration task.
+ * Called through balloon_mapping->a_ops->migratepage
+ */
+int virtballoon_migratepage(struct address_space *mapping,
+ struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+ struct balloon_dev_info *vb_dev_info = balloon_page_device(page);
+ struct virtio_balloon *vb;
+ unsigned long flags;
+
+ BUG_ON(!vb_dev_info);
+
+ vb = vb_dev_info->balloon_device;
+
+ /*
+ * In order to avoid lock contention while migrating pages concurrently
+ * to leak_balloon() or fill_balloon() we just give up the balloon_lock
+ * this turn, as it is easier to retry the page migration later.
+ * This also prevents fill_balloon() getting stuck into a mutex
+ * recursion in the case it ends up triggering memory compaction
+ * while it is attempting to inflate the ballon.
+ */
+ if (!mutex_trylock(&vb->balloon_lock))
+ return -EAGAIN;
+
+ /* balloon's page migration 1st step -- inflate "newpage" */
+ spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
+ balloon_page_insert(newpage, mapping, &vb_dev_info->pages);
+ vb_dev_info->isolated_pages--;
+ spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
+ vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
+ set_page_pfns(vb->pfns, newpage);
+ tell_host(vb, vb->inflate_vq);
+
+ /*
+ * balloon's page migration 2nd step -- deflate "page"
+ *
+ * It's safe to delete page->lru here because this page is at
+ * an isolated migration list, and this step is expected to happen here
+ */
+ balloon_page_delete(page);
+ vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
+ set_page_pfns(vb->pfns, page);
+ tell_host(vb, vb->deflate_vq);
+
+ mutex_unlock(&vb->balloon_lock);
+
+ return MIGRATEPAGE_BALLOON_SUCCESS;
+}
+
+/* define the balloon_mapping->a_ops callback to allow balloon page migration */
+static const struct address_space_operations virtio_balloon_aops = {
+ .migratepage = virtballoon_migratepage,
+};
+#endif /* CONFIG_BALLOON_COMPACTION */
+
static int virtballoon_probe(struct virtio_device *vdev)
{
struct virtio_balloon *vb;
+ struct address_space *vb_mapping;
+ struct balloon_dev_info *vb_devinfo;
int err;
vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
@@ -350,16 +436,37 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out;
}
- INIT_LIST_HEAD(&vb->pages);
vb->num_pages = 0;
+ mutex_init(&vb->balloon_lock);
init_waitqueue_head(&vb->config_change);
init_waitqueue_head(&vb->acked);
vb->vdev = vdev;
vb->need_stats_update = 0;
+ vb_devinfo = balloon_devinfo_alloc(vb);
+ if (IS_ERR(vb_devinfo)) {
+ err = PTR_ERR(vb_devinfo);
+ goto out_free_vb;
+ }
+
+ vb_mapping = balloon_mapping_alloc(vb_devinfo,
+ (balloon_compaction_check()) ?
+ &virtio_balloon_aops : NULL);
+ if (IS_ERR(vb_mapping)) {
+ /*
+ * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP
+ * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off.
+ */
+ err = PTR_ERR(vb_mapping);
+ if (err != -EOPNOTSUPP)
+ goto out_free_vb_devinfo;
+ }
+
+ vb->vb_dev_info = vb_devinfo;
+
err = init_vqs(vb);
if (err)
- goto out_free_vb;
+ goto out_free_vb_mapping;
vb->thread = kthread_run(balloon, vb, "vballoon");
if (IS_ERR(vb->thread)) {
@@ -371,6 +478,10 @@ static int virtballoon_probe(struct virtio_device *vdev)
out_del_vqs:
vdev->config->del_vqs(vdev);
+out_free_vb_mapping:
+ balloon_mapping_free(vb_mapping);
+out_free_vb_devinfo:
+ balloon_devinfo_free(vb_devinfo);
out_free_vb:
kfree(vb);
out:
@@ -396,6 +507,8 @@ static void __devexit virtballoon_remove(struct virtio_device *vdev)
kthread_stop(vb->thread);
remove_common(vb);
+ balloon_mapping_free(vb->vb_dev_info->mapping);
+ balloon_devinfo_free(vb->vb_dev_info);
kfree(vb);
}