aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/power
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/power')
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/disk.c66
-rw-r--r--kernel/power/main.c14
-rw-r--r--kernel/power/power.h32
-rw-r--r--kernel/power/poweroff.c4
-rw-r--r--kernel/power/process.c130
-rw-r--r--kernel/power/snapshot.c860
-rw-r--r--kernel/power/swap.c347
-rw-r--r--kernel/power/swsusp.c98
-rw-r--r--kernel/power/user.c102
10 files changed, 1141 insertions, 514 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 825068ca3479..710ed084e7c5 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -78,7 +78,7 @@ config PM_SYSFS_DEPRECATED
config SOFTWARE_SUSPEND
bool "Software Suspend"
- depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP) && !X86_PAE) || ((FRV || PPC32) && !SMP))
+ depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
---help---
Enable the possibility of suspending the machine.
It doesn't need ACPI or APM.
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index b1fb7866b0b3..0b00f56c2ad0 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -20,6 +20,7 @@
#include <linux/pm.h>
#include <linux/console.h>
#include <linux/cpu.h>
+#include <linux/freezer.h>
#include "power.h"
@@ -27,6 +28,23 @@
static int noresume = 0;
char resume_file[256] = CONFIG_PM_STD_PARTITION;
dev_t swsusp_resume_device;
+sector_t swsusp_resume_block;
+
+/**
+ * platform_prepare - prepare the machine for hibernation using the
+ * platform driver if so configured and return an error code if it fails
+ */
+
+static inline int platform_prepare(void)
+{
+ int error = 0;
+
+ if (pm_disk_mode == PM_DISK_PLATFORM) {
+ if (pm_ops && pm_ops->prepare)
+ error = pm_ops->prepare(PM_SUSPEND_DISK);
+ }
+ return error;
+}
/**
* power_down - Shut machine down for hibernate.
@@ -40,12 +58,10 @@ dev_t swsusp_resume_device;
static void power_down(suspend_disk_method_t mode)
{
- int error = 0;
-
switch(mode) {
case PM_DISK_PLATFORM:
kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
- error = pm_ops->enter(PM_SUSPEND_DISK);
+ pm_ops->enter(PM_SUSPEND_DISK);
break;
case PM_DISK_SHUTDOWN:
kernel_power_off();
@@ -90,12 +106,18 @@ static int prepare_processes(void)
goto thaw;
}
+ error = platform_prepare();
+ if (error)
+ goto thaw;
+
/* Free memory before shutting down devices. */
if (!(error = swsusp_shrink_memory()))
return 0;
-thaw:
+
+ platform_finish();
+ thaw:
thaw_processes();
-enable_cpus:
+ enable_cpus:
enable_nonboot_cpus();
pm_restore_console();
return error;
@@ -127,7 +149,7 @@ int pm_suspend_disk(void)
return error;
if (pm_disk_mode == PM_DISK_TESTPROC)
- goto Thaw;
+ return 0;
suspend_console();
error = device_suspend(PMSG_FREEZE);
@@ -189,10 +211,10 @@ static int software_resume(void)
{
int error;
- down(&pm_sem);
+ mutex_lock(&pm_mutex);
if (!swsusp_resume_device) {
if (!strlen(resume_file)) {
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
return -ENOENT;
}
swsusp_resume_device = name_to_dev_t(resume_file);
@@ -207,7 +229,7 @@ static int software_resume(void)
* FIXME: If noresume is specified, we need to find the partition
* and reset it back to normal swap space.
*/
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
return 0;
}
@@ -251,7 +273,7 @@ static int software_resume(void)
unprepare_processes();
Done:
/* For success case, the suspend path will release the lock */
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
pr_debug("PM: Resume from disk failed.\n");
return 0;
}
@@ -312,7 +334,7 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
p = memchr(buf, '\n', n);
len = p ? p - buf : n;
- down(&pm_sem);
+ mutex_lock(&pm_mutex);
for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) {
if (!strncmp(buf, pm_disk_modes[i], len)) {
mode = i;
@@ -336,7 +358,7 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
pr_debug("PM: suspend-to-disk mode set to '%s'\n",
pm_disk_modes[mode]);
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
return error ? error : n;
}
@@ -361,14 +383,14 @@ static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n)
if (maj != MAJOR(res) || min != MINOR(res))
goto out;
- down(&pm_sem);
+ mutex_lock(&pm_mutex);
swsusp_resume_device = res;
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
printk("Attempting manual resume\n");
noresume = 0;
software_resume();
ret = n;
-out:
+ out:
return ret;
}
@@ -423,6 +445,19 @@ static int __init resume_setup(char *str)
return 1;
}
+static int __init resume_offset_setup(char *str)
+{
+ unsigned long long offset;
+
+ if (noresume)
+ return 1;
+
+ if (sscanf(str, "%llu", &offset) == 1)
+ swsusp_resume_block = offset;
+
+ return 1;
+}
+
static int __init noresume_setup(char *str)
{
noresume = 1;
@@ -430,4 +465,5 @@ static int __init noresume_setup(char *str)
}
__setup("noresume", noresume_setup);
+__setup("resume_offset=", resume_offset_setup);
__setup("resume=", resume_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 873228c71dab..500eb87f643d 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -8,6 +8,7 @@
*
*/
+#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/kobject.h>
#include <linux/string.h>
@@ -18,13 +19,14 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/resume-trace.h>
+#include <linux/freezer.h>
#include "power.h"
/*This is just an arbitrary number */
#define FREE_PAGE_NUMBER (100)
-DECLARE_MUTEX(pm_sem);
+DEFINE_MUTEX(pm_mutex);
struct pm_ops *pm_ops;
suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
@@ -36,9 +38,9 @@ suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
void pm_set_ops(struct pm_ops * ops)
{
- down(&pm_sem);
+ mutex_lock(&pm_mutex);
pm_ops = ops;
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
}
@@ -182,7 +184,7 @@ static int enter_state(suspend_state_t state)
if (!valid_state(state))
return -ENODEV;
- if (down_trylock(&pm_sem))
+ if (!mutex_trylock(&pm_mutex))
return -EBUSY;
if (state == PM_SUSPEND_DISK) {
@@ -200,7 +202,7 @@ static int enter_state(suspend_state_t state)
pr_debug("PM: Finishing wakeup.\n");
suspend_finish(state);
Unlock:
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
return error;
}
@@ -229,7 +231,7 @@ int pm_suspend(suspend_state_t state)
return -EINVAL;
}
-
+EXPORT_SYMBOL(pm_suspend);
decl_subsys(power,NULL,NULL);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index bfe999f7b272..eb461b816bf4 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -22,7 +22,9 @@ static inline int pm_suspend_disk(void)
return -EPERM;
}
#endif
-extern struct semaphore pm_sem;
+
+extern struct mutex pm_mutex;
+
#define power_attr(_name) \
static struct subsys_attribute _name##_attr = { \
.attr = { \
@@ -42,6 +44,7 @@ extern const void __nosave_begin, __nosave_end;
extern unsigned long image_size;
extern int in_suspend;
extern dev_t swsusp_resume_device;
+extern sector_t swsusp_resume_block;
extern asmlinkage int swsusp_arch_suspend(void);
extern asmlinkage int swsusp_arch_resume(void);
@@ -102,8 +105,18 @@ struct snapshot_handle {
extern unsigned int snapshot_additional_pages(struct zone *zone);
extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
+extern void snapshot_write_finalize(struct snapshot_handle *handle);
extern int snapshot_image_loaded(struct snapshot_handle *handle);
-extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
+
+/*
+ * This structure is used to pass the values needed for the identification
+ * of the resume swap area from a user space to the kernel via the
+ * SNAPSHOT_SET_SWAP_AREA ioctl
+ */
+struct resume_swap_area {
+ loff_t offset;
+ u_int32_t dev;
+} __attribute__((packed));
#define SNAPSHOT_IOC_MAGIC '3'
#define SNAPSHOT_FREEZE _IO(SNAPSHOT_IOC_MAGIC, 1)
@@ -117,7 +130,14 @@ extern void snapshot_free_unused_memory(struct snapshot_handle *handle);
#define SNAPSHOT_FREE_SWAP_PAGES _IO(SNAPSHOT_IOC_MAGIC, 9)
#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
#define SNAPSHOT_S2RAM _IO(SNAPSHOT_IOC_MAGIC, 11)
-#define SNAPSHOT_IOC_MAXNR 11
+#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
+#define SNAPSHOT_SET_SWAP_AREA _IOW(SNAPSHOT_IOC_MAGIC, 13, \
+ struct resume_swap_area)
+#define SNAPSHOT_IOC_MAXNR 13
+
+#define PMOPS_PREPARE 1
+#define PMOPS_ENTER 2
+#define PMOPS_FINISH 3
/**
* The bitmap is used for tracing allocated swap pages
@@ -141,7 +161,7 @@ struct bitmap_page {
extern void free_bitmap(struct bitmap_page *bitmap);
extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits);
-extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap);
+extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap);
extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap);
extern int swsusp_check(void);
@@ -153,3 +173,7 @@ extern int swsusp_read(void);
extern int swsusp_write(void);
extern void swsusp_close(void);
extern int suspend_enter(suspend_state_t state);
+
+struct timeval;
+extern void swsusp_show_speed(struct timeval *, struct timeval *,
+ unsigned int, char *);
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index f1f900ac3164..678ec736076b 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -16,12 +16,12 @@
* callback we use.
*/
-static void do_poweroff(void *dummy)
+static void do_poweroff(struct work_struct *dummy)
{
kernel_power_off();
}
-static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
+static DECLARE_WORK(poweroff_work, do_poweroff);
static void handle_poweroff(int key, struct tty_struct *tty)
{
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 72e72d2c61e6..99eeb119b06d 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -13,12 +13,15 @@
#include <linux/suspend.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/freezer.h>
/*
* Timeout for stopping processes
*/
#define TIMEOUT (20 * HZ)
+#define FREEZER_KERNEL_THREADS 0
+#define FREEZER_USER_SPACE 1
static inline int freezeable(struct task_struct * p)
{
@@ -39,7 +42,6 @@ void refrigerator(void)
long save;
save = current->state;
pr_debug("%s entered refrigerator\n", current->comm);
- printk("=");
frozen_process(current);
spin_lock_irq(&current->sighand->siglock);
@@ -79,96 +81,136 @@ static void cancel_freezing(struct task_struct *p)
}
}
-/* 0 = success, else # of processes that we failed to stop */
-int freeze_processes(void)
+static inline int is_user_space(struct task_struct *p)
+{
+ return p->mm && !(p->flags & PF_BORROWED_MM);
+}
+
+static unsigned int try_to_freeze_tasks(int freeze_user_space)
{
- int todo, nr_user, user_frozen;
- unsigned long start_time;
struct task_struct *g, *p;
+ unsigned long end_time;
+ unsigned int todo;
- printk( "Stopping tasks: " );
- start_time = jiffies;
- user_frozen = 0;
+ end_time = jiffies + TIMEOUT;
do {
- nr_user = todo = 0;
+ todo = 0;
read_lock(&tasklist_lock);
do_each_thread(g, p) {
if (!freezeable(p))
continue;
+
if (frozen(p))
continue;
- if (p->state == TASK_TRACED && frozen(p->parent)) {
+
+ if (p->state == TASK_TRACED &&
+ (frozen(p->parent) ||
+ p->parent->state == TASK_STOPPED)) {
cancel_freezing(p);
continue;
}
- if (p->mm && !(p->flags & PF_BORROWED_MM)) {
- /* The task is a user-space one.
- * Freeze it unless there's a vfork completion
- * pending
+ if (is_user_space(p)) {
+ if (!freeze_user_space)
+ continue;
+
+ /* Freeze the task unless there is a vfork
+ * completion pending
*/
if (!p->vfork_done)
freeze_process(p);
- nr_user++;
} else {
- /* Freeze only if the user space is frozen */
- if (user_frozen)
- freeze_process(p);
- todo++;
+ if (freeze_user_space)
+ continue;
+
+ freeze_process(p);
}
+ todo++;
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
- todo += nr_user;
- if (!user_frozen && !nr_user) {
- sys_sync();
- start_time = jiffies;
- }
- user_frozen = !nr_user;
yield(); /* Yield is okay here */
- if (todo && time_after(jiffies, start_time + TIMEOUT))
+ if (todo && time_after(jiffies, end_time))
break;
- } while(todo);
+ } while (todo);
- /* This does not unfreeze processes that are already frozen
- * (we have slightly ugly calling convention in that respect,
- * and caller must call thaw_processes() if something fails),
- * but it cleans up leftover PF_FREEZE requests.
- */
if (todo) {
- printk( "\n" );
- printk(KERN_ERR " stopping tasks timed out "
- "after %d seconds (%d tasks remaining):\n",
- TIMEOUT / HZ, todo);
+ /* This does not unfreeze processes that are already frozen
+ * (we have slightly ugly calling convention in that respect,
+ * and caller must call thaw_processes() if something fails),
+ * but it cleans up leftover PF_FREEZE requests.
+ */
+ printk("\n");
+ printk(KERN_ERR "Stopping %s timed out after %d seconds "
+ "(%d tasks refusing to freeze):\n",
+ freeze_user_space ? "user space processes" :
+ "kernel threads",
+ TIMEOUT / HZ, todo);
read_lock(&tasklist_lock);
do_each_thread(g, p) {
+ if (is_user_space(p) == !freeze_user_space)
+ continue;
+
if (freezeable(p) && !frozen(p))
- printk(KERN_ERR " %s\n", p->comm);
+ printk(KERN_ERR " %s\n", p->comm);
+
cancel_freezing(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
- return todo;
}
- printk( "|\n" );
+ return todo;
+}
+
+/**
+ * freeze_processes - tell processes to enter the refrigerator
+ *
+ * Returns 0 on success, or the number of processes that didn't freeze,
+ * although they were told to.
+ */
+int freeze_processes(void)
+{
+ unsigned int nr_unfrozen;
+
+ printk("Stopping tasks ... ");
+ nr_unfrozen = try_to_freeze_tasks(FREEZER_USER_SPACE);
+ if (nr_unfrozen)
+ return nr_unfrozen;
+
+ sys_sync();
+ nr_unfrozen = try_to_freeze_tasks(FREEZER_KERNEL_THREADS);
+ if (nr_unfrozen)
+ return nr_unfrozen;
+
+ printk("done.\n");
BUG_ON(in_atomic());
return 0;
}
-void thaw_processes(void)
+static void thaw_tasks(int thaw_user_space)
{
struct task_struct *g, *p;
- printk( "Restarting tasks..." );
read_lock(&tasklist_lock);
do_each_thread(g, p) {
if (!freezeable(p))
continue;
+
+ if (is_user_space(p) == !thaw_user_space)
+ continue;
+
if (!thaw_process(p))
- printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
+ printk(KERN_WARNING " Strange, %s not stopped\n",
+ p->comm );
} while_each_thread(g, p);
-
read_unlock(&tasklist_lock);
+}
+
+void thaw_processes(void)
+{
+ printk("Restarting tasks ... ");
+ thaw_tasks(FREEZER_KERNEL_THREADS);
+ thaw_tasks(FREEZER_USER_SPACE);
schedule();
- printk( " done\n" );
+ printk("done.\n");
}
EXPORT_SYMBOL(refrigerator);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 99f9b7d177d6..c024606221c4 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1,15 +1,15 @@
/*
* linux/kernel/power/snapshot.c
*
- * This file provide system snapshot/restore functionality.
+ * This file provides system snapshot/restore functionality for swsusp.
*
* Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
*
- * This file is released under the GPLv2, and is based on swsusp.c.
+ * This file is released under the GPLv2.
*
*/
-
#include <linux/version.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -34,137 +34,24 @@
#include "power.h"
-/* List of PBEs used for creating and restoring the suspend image */
+/* List of PBEs needed for restoring the pages that were allocated before
+ * the suspend and included in the suspend image, but have also been
+ * allocated by the "resume" kernel, so their contents cannot be written
+ * directly to their "original" page frames.
+ */
struct pbe *restore_pblist;
-static unsigned int nr_copy_pages;
-static unsigned int nr_meta_pages;
+/* Pointer to an auxiliary buffer (1 page) */
static void *buffer;
-#ifdef CONFIG_HIGHMEM
-unsigned int count_highmem_pages(void)
-{
- struct zone *zone;
- unsigned long zone_pfn;
- unsigned int n = 0;
-
- for_each_zone (zone)
- if (is_highmem(zone)) {
- mark_free_pages(zone);
- for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) {
- struct page *page;
- unsigned long pfn = zone_pfn + zone->zone_start_pfn;
- if (!pfn_valid(pfn))
- continue;
- page = pfn_to_page(pfn);
- if (PageReserved(page))
- continue;
- if (PageNosaveFree(page))
- continue;
- n++;
- }
- }
- return n;
-}
-
-struct highmem_page {
- char *data;
- struct page *page;
- struct highmem_page *next;
-};
-
-static struct highmem_page *highmem_copy;
-
-static int save_highmem_zone(struct zone *zone)
-{
- unsigned long zone_pfn;
- mark_free_pages(zone);
- for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
- struct page *page;
- struct highmem_page *save;
- void *kaddr;
- unsigned long pfn = zone_pfn + zone->zone_start_pfn;
-
- if (!(pfn%10000))
- printk(".");
- if (!pfn_valid(pfn))
- continue;
- page = pfn_to_page(pfn);
- /*
- * This condition results from rvmalloc() sans vmalloc_32()
- * and architectural memory reservations. This should be
- * corrected eventually when the cases giving rise to this
- * are better understood.
- */
- if (PageReserved(page))
- continue;
- BUG_ON(PageNosave(page));
- if (PageNosaveFree(page))
- continue;
- save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
- if (!save)
- return -ENOMEM;
- save->next = highmem_copy;
- save->page = page;
- save->data = (void *) get_zeroed_page(GFP_ATOMIC);
- if (!save->data) {
- kfree(save);
- return -ENOMEM;
- }
- kaddr = kmap_atomic(page, KM_USER0);
- memcpy(save->data, kaddr, PAGE_SIZE);
- kunmap_atomic(kaddr, KM_USER0);
- highmem_copy = save;
- }
- return 0;
-}
-
-int save_highmem(void)
-{
- struct zone *zone;
- int res = 0;
-
- pr_debug("swsusp: Saving Highmem");
- drain_local_pages();
- for_each_zone (zone) {
- if (is_highmem(zone))
- res = save_highmem_zone(zone);
- if (res)
- return res;
- }
- printk("\n");
- return 0;
-}
-
-int restore_highmem(void)
-{
- printk("swsusp: Restoring Highmem\n");
- while (highmem_copy) {
- struct highmem_page *save = highmem_copy;
- void *kaddr;
- highmem_copy = save->next;
-
- kaddr = kmap_atomic(save->page, KM_USER0);
- memcpy(kaddr, save->data, PAGE_SIZE);
- kunmap_atomic(kaddr, KM_USER0);
- free_page((long) save->data);
- kfree(save);
- }
- return 0;
-}
-#else
-static inline unsigned int count_highmem_pages(void) {return 0;}
-static inline int save_highmem(void) {return 0;}
-static inline int restore_highmem(void) {return 0;}
-#endif
-
/**
* @safe_needed - on resume, for storing the PBE list and the image,
* we can only use memory pages that do not conflict with the pages
- * used before suspend.
+ * used before suspend. The unsafe pages have PageNosaveFree set
+ * and we count them using unsafe_pages.
*
- * The unsafe pages are marked with the PG_nosave_free flag
- * and we count them using unsafe_pages
+ * Each allocated image page is marked as PageNosave and PageNosaveFree
+ * so that swsusp_free() can release it.
*/
#define PG_ANY 0
@@ -174,7 +61,7 @@ static inline int restore_highmem(void) {return 0;}
static unsigned int allocated_unsafe_pages;
-static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
+static void *get_image_page(gfp_t gfp_mask, int safe_needed)
{
void *res;
@@ -195,20 +82,39 @@ static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
unsigned long get_safe_page(gfp_t gfp_mask)
{
- return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE);
+ return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
+}
+
+static struct page *alloc_image_page(gfp_t gfp_mask)
+{
+ struct page *page;
+
+ page = alloc_page(gfp_mask);
+ if (page) {
+ SetPageNosave(page);
+ SetPageNosaveFree(page);
+ }
+ return page;
}
/**
* free_image_page - free page represented by @addr, allocated with
- * alloc_image_page (page flags set by it must be cleared)
+ * get_image_page (page flags set by it must be cleared)
*/
static inline void free_image_page(void *addr, int clear_nosave_free)
{
- ClearPageNosave(virt_to_page(addr));
+ struct page *page;
+
+ BUG_ON(!virt_addr_valid(addr));
+
+ page = virt_to_page(addr);
+
+ ClearPageNosave(page);
if (clear_nosave_free)
- ClearPageNosaveFree(virt_to_page(addr));
- free_page((unsigned long)addr);
+ ClearPageNosaveFree(page);
+
+ __free_page(page);
}
/* struct linked_page is used to build chains of pages */
@@ -269,7 +175,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
struct linked_page *lp;
- lp = alloc_image_page(ca->gfp_mask, ca->safe_needed);
+ lp = get_image_page(ca->gfp_mask, ca->safe_needed);
if (!lp)
return NULL;
@@ -446,8 +352,8 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
/* Compute the number of zones */
nr = 0;
- for_each_zone (zone)
- if (populated_zone(zone) && !is_highmem(zone))
+ for_each_zone(zone)
+ if (populated_zone(zone))
nr++;
/* Allocate the list of zones bitmap objects */
@@ -459,10 +365,10 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
}
/* Initialize the zone bitmap objects */
- for_each_zone (zone) {
+ for_each_zone(zone) {
unsigned long pfn;
- if (!populated_zone(zone) || is_highmem(zone))
+ if (!populated_zone(zone))
continue;
zone_bm->start_pfn = zone->zone_start_pfn;
@@ -481,7 +387,7 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
while (bb) {
unsigned long *ptr;
- ptr = alloc_image_page(gfp_mask, safe_needed);
+ ptr = get_image_page(gfp_mask, safe_needed);
bb->data = ptr;
if (!ptr)
goto Free;
@@ -505,7 +411,7 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
memory_bm_position_reset(bm);
return 0;
-Free:
+ Free:
bm->p_list = ca.chain;
memory_bm_free(bm, PG_UNSAFE_CLEAR);
return -ENOMEM;
@@ -651,7 +557,7 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
memory_bm_position_reset(bm);
return BM_END_OF_MAP;
-Return_pfn:
+ Return_pfn:
bm->cur.chunk = chunk;
bm->cur.bit = bit;
return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit;
@@ -669,10 +575,82 @@ unsigned int snapshot_additional_pages(struct zone *zone)
res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
- return res;
+ return 2 * res;
+}
+
+#ifdef CONFIG_HIGHMEM
+/**
+ * count_free_highmem_pages - compute the total number of free highmem
+ * pages, system-wide.
+ */
+
+static unsigned int count_free_highmem_pages(void)
+{
+ struct zone *zone;
+ unsigned int cnt = 0;
+
+ for_each_zone(zone)
+ if (populated_zone(zone) && is_highmem(zone))
+ cnt += zone->free_pages;
+
+ return cnt;
+}
+
+/**
+ * saveable_highmem_page - Determine whether a highmem page should be
+ * included in the suspend image.
+ *
+ * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
+ * and it isn't a part of a free chunk of pages.
+ */
+
+static struct page *saveable_highmem_page(unsigned long pfn)
+{
+ struct page *page;
+
+ if (!pfn_valid(pfn))
+ return NULL;
+
+ page = pfn_to_page(pfn);
+
+ BUG_ON(!PageHighMem(page));
+
+ if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page))
+ return NULL;
+
+ return page;
}
/**
+ * count_highmem_pages - compute the total number of saveable highmem
+ * pages.
+ */
+
+unsigned int count_highmem_pages(void)
+{
+ struct zone *zone;
+ unsigned int n = 0;
+
+ for_each_zone(zone) {
+ unsigned long pfn, max_zone_pfn;
+
+ if (!is_highmem(zone))
+ continue;
+
+ mark_free_pages(zone);
+ max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ if (saveable_highmem_page(pfn))
+ n++;
+ }
+ return n;
+}
+#else
+static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
+static inline unsigned int count_highmem_pages(void) { return 0; }
+#endif /* CONFIG_HIGHMEM */
+
+/**
* pfn_is_nosave - check if given pfn is in the 'nosave' section
*/
@@ -684,12 +662,12 @@ static inline int pfn_is_nosave(unsigned long pfn)
}
/**
- * saveable - Determine whether a page should be cloned or not.
- * @pfn: The page
+ * saveable - Determine whether a non-highmem page should be included in
+ * the suspend image.
*
- * We save a page if it isn't Nosave, and is not in the range of pages
- * statically defined as 'unsaveable', and it
- * isn't a part of a free chunk of pages.
+ * We should save the page if it isn't Nosave, and is not in the range
+ * of pages statically defined as 'unsaveable', and it isn't a part of
+ * a free chunk of pages.
*/
static struct page *saveable_page(unsigned long pfn)
@@ -701,76 +679,130 @@ static struct page *saveable_page(unsigned long pfn)
page = pfn_to_page(pfn);
- if (PageNosave(page))
+ BUG_ON(PageHighMem(page));
+
+ if (PageNosave(page) || PageNosaveFree(page))
return NULL;
+
if (PageReserved(page) && pfn_is_nosave(pfn))
return NULL;
- if (PageNosaveFree(page))
- return NULL;
return page;
}
+/**
+ * count_data_pages - compute the total number of saveable non-highmem
+ * pages.
+ */
+
unsigned int count_data_pages(void)
{
struct zone *zone;
unsigned long pfn, max_zone_pfn;
unsigned int n = 0;
- for_each_zone (zone) {
+ for_each_zone(zone) {
if (is_highmem(zone))
continue;
+
mark_free_pages(zone);
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
- n += !!saveable_page(pfn);
+ if(saveable_page(pfn))
+ n++;
}
return n;
}
-static inline void copy_data_page(long *dst, long *src)
+/* This is needed, because copy_page and memcpy are not usable for copying
+ * task structs.
+ */
+static inline void do_copy_page(long *dst, long *src)
{
int n;
- /* copy_page and memcpy are not usable for copying task structs. */
for (n = PAGE_SIZE / sizeof(long); n; n--)
*dst++ = *src++;
}
+#ifdef CONFIG_HIGHMEM
+static inline struct page *
+page_is_saveable(struct zone *zone, unsigned long pfn)
+{
+ return is_highmem(zone) ?
+ saveable_highmem_page(pfn) : saveable_page(pfn);
+}
+
+static inline void
+copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+{
+ struct page *s_page, *d_page;
+ void *src, *dst;
+
+ s_page = pfn_to_page(src_pfn);
+ d_page = pfn_to_page(dst_pfn);
+ if (PageHighMem(s_page)) {
+ src = kmap_atomic(s_page, KM_USER0);
+ dst = kmap_atomic(d_page, KM_USER1);
+ do_copy_page(dst, src);
+ kunmap_atomic(src, KM_USER0);
+ kunmap_atomic(dst, KM_USER1);
+ } else {
+ src = page_address(s_page);
+ if (PageHighMem(d_page)) {
+ /* Page pointed to by src may contain some kernel
+ * data modified by kmap_atomic()
+ */
+ do_copy_page(buffer, src);
+ dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0);
+ memcpy(dst, buffer, PAGE_SIZE);
+ kunmap_atomic(dst, KM_USER0);
+ } else {
+ dst = page_address(d_page);
+ do_copy_page(dst, src);
+ }
+ }
+}
+#else
+#define page_is_saveable(zone, pfn) saveable_page(pfn)
+
+static inline void
+copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+{
+ do_copy_page(page_address(pfn_to_page(dst_pfn)),
+ page_address(pfn_to_page(src_pfn)));
+}
+#endif /* CONFIG_HIGHMEM */
+
static void
copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
{
struct zone *zone;
unsigned long pfn;
- for_each_zone (zone) {
+ for_each_zone(zone) {
unsigned long max_zone_pfn;
- if (is_highmem(zone))
- continue;
-
mark_free_pages(zone);
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
- if (saveable_page(pfn))
+ if (page_is_saveable(zone, pfn))
memory_bm_set_bit(orig_bm, pfn);
}
memory_bm_position_reset(orig_bm);
memory_bm_position_reset(copy_bm);
do {
pfn = memory_bm_next_pfn(orig_bm);
- if (likely(pfn != BM_END_OF_MAP)) {
- struct page *page;
- void *src;
-
- page = pfn_to_page(pfn);
- src = page_address(page);
- page = pfn_to_page(memory_bm_next_pfn(copy_bm));
- copy_data_page(page_address(page), src);
- }
+ if (likely(pfn != BM_END_OF_MAP))
+ copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
} while (pfn != BM_END_OF_MAP);
}
+/* Total number of image pages */
+static unsigned int nr_copy_pages;
+/* Number of pages needed for saving the original pfns of the image pages */
+static unsigned int nr_meta_pages;
+
/**
* swsusp_free - free pages allocated for the suspend.
*
@@ -792,7 +824,7 @@ void swsusp_free(void)
if (PageNosave(page) && PageNosaveFree(page)) {
ClearPageNosave(page);
ClearPageNosaveFree(page);
- free_page((long) page_address(page));
+ __free_page(page);
}
}
}
@@ -802,34 +834,108 @@ void swsusp_free(void)
buffer = NULL;
}
+#ifdef CONFIG_HIGHMEM
+/**
+ * count_pages_for_highmem - compute the number of non-highmem pages
+ * that will be necessary for creating copies of highmem pages.
+ */
+
+static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
+{
+ unsigned int free_highmem = count_free_highmem_pages();
+
+ if (free_highmem >= nr_highmem)
+ nr_highmem = 0;
+ else
+ nr_highmem -= free_highmem;
+
+ return nr_highmem;
+}
+#else
+static unsigned int
+count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
+#endif /* CONFIG_HIGHMEM */
/**
- * enough_free_mem - Make sure we enough free memory to snapshot.
- *
- * Returns TRUE or FALSE after checking the number of available
- * free pages.
+ * enough_free_mem - Make sure we have enough free memory for the
+ * snapshot image.
*/
-static int enough_free_mem(unsigned int nr_pages)
+static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
{
struct zone *zone;
unsigned int free = 0, meta = 0;
- for_each_zone (zone)
- if (!is_highmem(zone)) {
+ for_each_zone(zone) {
+ meta += snapshot_additional_pages(zone);
+ if (!is_highmem(zone))
free += zone->free_pages;
- meta += snapshot_additional_pages(zone);
- }
+ }
- pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n",
+ nr_pages += count_pages_for_highmem(nr_highmem);
+ pr_debug("swsusp: Normal pages needed: %u + %u + %u, available pages: %u\n",
nr_pages, PAGES_FOR_IO, meta, free);
return free > nr_pages + PAGES_FOR_IO + meta;
}
+#ifdef CONFIG_HIGHMEM
+/**
+ * get_highmem_buffer - if there are some highmem pages in the suspend
+ * image, we may need the buffer to copy them and/or load their data.
+ */
+
+static inline int get_highmem_buffer(int safe_needed)
+{
+ buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
+ return buffer ? 0 : -ENOMEM;
+}
+
+/**
+ * alloc_highmem_image_pages - allocate some highmem pages for the image.
+ * Try to allocate as many pages as needed, but if the number of free
+ * highmem pages is lesser than that, allocate them all.
+ */
+
+static inline unsigned int
+alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
+{
+ unsigned int to_alloc = count_free_highmem_pages();
+
+ if (to_alloc > nr_highmem)
+ to_alloc = nr_highmem;
+
+ nr_highmem -= to_alloc;
+ while (to_alloc-- > 0) {
+ struct page *page;
+
+ page = alloc_image_page(__GFP_HIGHMEM);
+ memory_bm_set_bit(bm, page_to_pfn(page));
+ }
+ return nr_highmem;
+}
+#else
+static inline int get_highmem_buffer(int safe_needed) { return 0; }
+
+static inline unsigned int
+alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
+#endif /* CONFIG_HIGHMEM */
+
+/**
+ * swsusp_alloc - allocate memory for the suspend image
+ *
+ * We first try to allocate as many highmem pages as there are
+ * saveable highmem pages in the system. If that fails, we allocate
+ * non-highmem pages for the copies of the remaining highmem ones.
+ *
+ * In this approach it is likely that the copies of highmem pages will
+ * also be located in the high memory, because of the way in which
+ * copy_data_pages() works.
+ */
+
static int
swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
- unsigned int nr_pages)
+ unsigned int nr_pages, unsigned int nr_highmem)
{
int error;
@@ -841,46 +947,61 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
if (error)
goto Free;
+ if (nr_highmem > 0) {
+ error = get_highmem_buffer(PG_ANY);
+ if (error)
+ goto Free;
+
+ nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem);
+ }
while (nr_pages-- > 0) {
- struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+ struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
+
if (!page)
goto Free;
- SetPageNosave(page);
- SetPageNosaveFree(page);
memory_bm_set_bit(copy_bm, page_to_pfn(page));
}
return 0;
-Free:
+ Free:
swsusp_free();
return -ENOMEM;
}
-/* Memory bitmap used for marking saveable pages */
+/* Memory bitmap used for marking saveable pages (during suspend) or the
+ * suspend image pages (during resume)
+ */
static struct memory_bitmap orig_bm;
-/* Memory bitmap used for marking allocated pages that will contain the copies
- * of saveable pages
+/* Memory bitmap used on suspend for marking allocated pages that will contain
+ * the copies of saveable pages. During resume it is initially used for
+ * marking the suspend image pages, but then its set bits are duplicated in
+ * @orig_bm and it is released. Next, on systems with high memory, it may be
+ * used for marking "safe" highmem pages, but it has to be reinitialized for
+ * this purpose.
*/
static struct memory_bitmap copy_bm;
asmlinkage int swsusp_save(void)
{
- unsigned int nr_pages;
+ unsigned int nr_pages, nr_highmem;
- pr_debug("swsusp: critical section: \n");
+ printk("swsusp: critical section: \n");
drain_local_pages();
nr_pages = count_data_pages();
- printk("swsusp: Need to copy %u pages\n", nr_pages);
+ nr_highmem = count_highmem_pages();
+ printk("swsusp: Need to copy %u pages\n", nr_pages + nr_highmem);
- if (!enough_free_mem(nr_pages)) {
+ if (!enough_free_mem(nr_pages, nr_highmem)) {
printk(KERN_ERR "swsusp: Not enough free memory\n");
return -ENOMEM;
}
- if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages))
+ if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
+ printk(KERN_ERR "swsusp: Memory allocation failed\n");
return -ENOMEM;
+ }
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
@@ -894,10 +1015,12 @@ asmlinkage int swsusp_save(void)
* touch swap space! Except we must write out our image of course.
*/
+ nr_pages += nr_highmem;
nr_copy_pages = nr_pages;
- nr_meta_pages = (nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
+
return 0;
}
@@ -960,7 +1083,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
if (!buffer) {
/* This makes the buffer be freed by swsusp_free() */
- buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
+ buffer = get_image_page(GFP_ATOMIC, PG_ANY);
if (!buffer)
return -ENOMEM;
}
@@ -975,9 +1098,23 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
memset(buffer, 0, PAGE_SIZE);
pack_pfns(buffer, &orig_bm);
} else {
- unsigned long pfn = memory_bm_next_pfn(&copy_bm);
+ struct page *page;
- handle->buffer = page_address(pfn_to_page(pfn));
+ page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
+ if (PageHighMem(page)) {
+ /* Highmem pages are copied to the buffer,
+ * because we can't return with a kmapped
+ * highmem page (we may not be called again).
+ */
+ void *kaddr;
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ memcpy(buffer, kaddr, PAGE_SIZE);
+ kunmap_atomic(kaddr, KM_USER0);
+ handle->buffer = buffer;
+ } else {
+ handle->buffer = page_address(page);
+ }
}
handle->prev = handle->cur;
}
@@ -1005,7 +1142,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
unsigned long pfn, max_zone_pfn;
/* Clear page flags */
- for_each_zone (zone) {
+ for_each_zone(zone) {
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn))
@@ -1101,6 +1238,218 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
}
}
+/* List of "safe" pages that may be used to store data loaded from the suspend
+ * image
+ */
+static struct linked_page *safe_pages_list;
+
+#ifdef CONFIG_HIGHMEM
+/* struct highmem_pbe is used for creating the list of highmem pages that
+ * should be restored atomically during the resume from disk, because the page
+ * frames they have occupied before the suspend are in use.
+ */
+struct highmem_pbe {
+ struct page *copy_page; /* data is here now */
+ struct page *orig_page; /* data was here before the suspend */
+ struct highmem_pbe *next;
+};
+
+/* List of highmem PBEs needed for restoring the highmem pages that were
+ * allocated before the suspend and included in the suspend image, but have
+ * also been allocated by the "resume" kernel, so their contents cannot be
+ * written directly to their "original" page frames.
+ */
+static struct highmem_pbe *highmem_pblist;
+
+/**
+ * count_highmem_image_pages - compute the number of highmem pages in the
+ * suspend image. The bits in the memory bitmap @bm that correspond to the
+ * image pages are assumed to be set.
+ */
+
+static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
+{
+ unsigned long pfn;
+ unsigned int cnt = 0;
+
+ memory_bm_position_reset(bm);
+ pfn = memory_bm_next_pfn(bm);
+ while (pfn != BM_END_OF_MAP) {
+ if (PageHighMem(pfn_to_page(pfn)))
+ cnt++;
+
+ pfn = memory_bm_next_pfn(bm);
+ }
+ return cnt;
+}
+
+/**
+ * prepare_highmem_image - try to allocate as many highmem pages as
+ * there are highmem image pages (@nr_highmem_p points to the variable
+ * containing the number of highmem image pages). The pages that are
+ * "safe" (ie. will not be overwritten when the suspend image is
+ * restored) have the corresponding bits set in @bm (it must be
+ * unitialized).
+ *
+ * NOTE: This function should not be called if there are no highmem
+ * image pages.
+ */
+
+static unsigned int safe_highmem_pages;
+
+static struct memory_bitmap *safe_highmem_bm;
+
+static int
+prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
+{
+ unsigned int to_alloc;
+
+ if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
+ return -ENOMEM;
+
+ if (get_highmem_buffer(PG_SAFE))
+ return -ENOMEM;
+
+ to_alloc = count_free_highmem_pages();
+ if (to_alloc > *nr_highmem_p)
+ to_alloc = *nr_highmem_p;
+ else
+ *nr_highmem_p = to_alloc;
+
+ safe_highmem_pages = 0;
+ while (to_alloc-- > 0) {
+ struct page *page;
+
+ page = alloc_page(__GFP_HIGHMEM);
+ if (!PageNosaveFree(page)) {
+ /* The page is "safe", set its bit the bitmap */
+ memory_bm_set_bit(bm, page_to_pfn(page));
+ safe_highmem_pages++;
+ }
+ /* Mark the page as allocated */
+ SetPageNosave(page);
+ SetPageNosaveFree(page);
+ }
+ memory_bm_position_reset(bm);
+ safe_highmem_bm = bm;
+ return 0;
+}
+
+/**
+ * get_highmem_page_buffer - for given highmem image page find the buffer
+ * that suspend_write_next() should set for its caller to write to.
+ *
+ * If the page is to be saved to its "original" page frame or a copy of
+ * the page is to be made in the highmem, @buffer is returned. Otherwise,
+ * the copy of the page is to be made in normal memory, so the address of
+ * the copy is returned.
+ *
+ * If @buffer is returned, the caller of suspend_write_next() will write
+ * the page's contents to @buffer, so they will have to be copied to the
+ * right location on the next call to suspend_write_next() and it is done
+ * with the help of copy_last_highmem_page(). For this purpose, if
+ * @buffer is returned, @last_highmem page is set to the page to which
+ * the data will have to be copied from @buffer.
+ */
+
+static struct page *last_highmem_page;
+
+static void *
+get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
+{
+ struct highmem_pbe *pbe;
+ void *kaddr;
+
+ if (PageNosave(page) && PageNosaveFree(page)) {
+ /* We have allocated the "original" page frame and we can
+ * use it directly to store the loaded page.
+ */
+ last_highmem_page = page;
+ return buffer;
+ }
+ /* The "original" page frame has not been allocated and we have to
+ * use a "safe" page frame to store the loaded page.
+ */
+ pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
+ if (!pbe) {
+ swsusp_free();
+ return NULL;
+ }
+ pbe->orig_page = page;
+ if (safe_highmem_pages > 0) {
+ struct page *tmp;
+
+ /* Copy of the page will be stored in high memory */
+ kaddr = buffer;
+ tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
+ safe_highmem_pages--;
+ last_highmem_page = tmp;
+ pbe->copy_page = tmp;
+ } else {
+ /* Copy of the page will be stored in normal memory */
+ kaddr = safe_pages_list;
+ safe_pages_list = safe_pages_list->next;
+ pbe->copy_page = virt_to_page(kaddr);
+ }
+ pbe->next = highmem_pblist;
+ highmem_pblist = pbe;
+ return kaddr;
+}
+
+/**
+ * copy_last_highmem_page - copy the contents of a highmem image from
+ * @buffer, where the caller of snapshot_write_next() has place them,
+ * to the right location represented by @last_highmem_page .
+ */
+
+static void copy_last_highmem_page(void)
+{
+ if (last_highmem_page) {
+ void *dst;
+
+ dst = kmap_atomic(last_highmem_page, KM_USER0);
+ memcpy(dst, buffer, PAGE_SIZE);
+ kunmap_atomic(dst, KM_USER0);
+ last_highmem_page = NULL;
+ }
+}
+
+static inline int last_highmem_page_copied(void)
+{
+ return !last_highmem_page;
+}
+
+static inline void free_highmem_data(void)
+{
+ if (safe_highmem_bm)
+ memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
+
+ if (buffer)
+ free_image_page(buffer, PG_UNSAFE_CLEAR);
+}
+#else
+static inline int get_safe_write_buffer(void) { return 0; }
+
+static unsigned int
+count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
+
+static inline int
+prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
+{
+ return 0;
+}
+
+static inline void *
+get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
+{
+ return NULL;
+}
+
+static inline void copy_last_highmem_page(void) {}
+static inline int last_highmem_page_copied(void) { return 1; }
+static inline void free_highmem_data(void) {}
+#endif /* CONFIG_HIGHMEM */
+
/**
* prepare_image - use the memory bitmap @bm to mark the pages that will
* be overwritten in the process of restoring the system memory state
@@ -1110,20 +1459,25 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
* The idea is to allocate a new memory bitmap first and then allocate
* as many pages as needed for the image data, but not to assign these
* pages to specific tasks initially. Instead, we just mark them as
- * allocated and create a list of "safe" pages that will be used later.
+ * allocated and create a lists of "safe" pages that will be used
+ * later. On systems with high memory a list of "safe" highmem pages is
+ * also created.
*/
#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
-static struct linked_page *safe_pages_list;
-
static int
prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
{
- unsigned int nr_pages;
+ unsigned int nr_pages, nr_highmem;
struct linked_page *sp_list, *lp;
int error;
+ /* If there is no highmem, the buffer will not be necessary */
+ free_image_page(buffer, PG_UNSAFE_CLEAR);
+ buffer = NULL;
+
+ nr_highmem = count_highmem_image_pages(bm);
error = mark_unsafe_pages(bm);
if (error)
goto Free;
@@ -1134,6 +1488,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
duplicate_memory_bitmap(new_bm, bm);
memory_bm_free(bm, PG_UNSAFE_KEEP);
+ if (nr_highmem > 0) {
+ error = prepare_highmem_image(bm, &nr_highmem);
+ if (error)
+ goto Free;
+ }
/* Reserve some safe pages for potential later use.
*
* NOTE: This way we make sure there will be enough safe pages for the
@@ -1142,10 +1501,10 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
*/
sp_list = NULL;
/* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
- nr_pages = nr_copy_pages - allocated_unsafe_pages;
+ nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
while (nr_pages > 0) {
- lp = alloc_image_page(GFP_ATOMIC, PG_SAFE);
+ lp = get_image_page(GFP_ATOMIC, PG_SAFE);
if (!lp) {
error = -ENOMEM;
goto Free;
@@ -1156,7 +1515,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
}
/* Preallocate memory for the image */
safe_pages_list = NULL;
- nr_pages = nr_copy_pages - allocated_unsafe_pages;
+ nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
while (nr_pages > 0) {
lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
if (!lp) {
@@ -1181,7 +1540,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
}
return 0;
-Free:
+ Free:
swsusp_free();
return error;
}
@@ -1196,6 +1555,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
struct pbe *pbe;
struct page *page = pfn_to_page(memory_bm_next_pfn(bm));
+ if (PageHighMem(page))
+ return get_highmem_page_buffer(page, ca);
+
if (PageNosave(page) && PageNosaveFree(page))
/* We have allocated the "original" page frame and we can
* use it directly to store the loaded page.
@@ -1210,12 +1572,12 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
swsusp_free();
return NULL;
}
- pbe->orig_address = (unsigned long)page_address(page);
- pbe->address = (unsigned long)safe_pages_list;
+ pbe->orig_address = page_address(page);
+ pbe->address = safe_pages_list;
safe_pages_list = safe_pages_list->next;
pbe->next = restore_pblist;
restore_pblist = pbe;
- return (void *)pbe->address;
+ return pbe->address;
}
/**
@@ -1249,14 +1611,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
return 0;
- if (!buffer) {
- /* This makes the buffer be freed by swsusp_free() */
- buffer = alloc_image_page(GFP_ATOMIC, PG_ANY);
+ if (handle->offset == 0) {
+ if (!buffer)
+ /* This makes the buffer be freed by swsusp_free() */
+ buffer = get_image_page(GFP_ATOMIC, PG_ANY);
+
if (!buffer)
return -ENOMEM;
- }
- if (!handle->offset)
+
handle->buffer = buffer;
+ }
handle->sync_read = 1;
if (handle->prev < handle->cur) {
if (handle->prev == 0) {
@@ -1284,8 +1648,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
return -ENOMEM;
}
} else {
+ copy_last_highmem_page();
handle->buffer = get_buffer(&orig_bm, &ca);
- handle->sync_read = 0;
+ if (handle->buffer != buffer)
+ handle->sync_read = 0;
}
handle->prev = handle->cur;
}
@@ -1301,15 +1667,73 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
return count;
}
+/**
+ * snapshot_write_finalize - must be called after the last call to
+ * snapshot_write_next() in case the last page in the image happens
+ * to be a highmem page and its contents should be stored in the
+ * highmem. Additionally, it releases the memory that will not be
+ * used any more.
+ */
+
+void snapshot_write_finalize(struct snapshot_handle *handle)
+{
+ copy_last_highmem_page();
+ /* Free only if we have loaded the image entirely */
+ if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
+ memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
+ free_highmem_data();
+ }
+}
+
int snapshot_image_loaded(struct snapshot_handle *handle)
{
- return !(!nr_copy_pages ||
+ return !(!nr_copy_pages || !last_highmem_page_copied() ||
handle->cur <= nr_meta_pages + nr_copy_pages);
}
-void snapshot_free_unused_memory(struct snapshot_handle *handle)
+#ifdef CONFIG_HIGHMEM
+/* Assumes that @buf is ready and points to a "safe" page */
+static inline void
+swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
{
- /* Free only if we have loaded the image entirely */
- if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
- memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
+ void *kaddr1, *kaddr2;
+
+ kaddr1 = kmap_atomic(p1, KM_USER0);
+ kaddr2 = kmap_atomic(p2, KM_USER1);
+ memcpy(buf, kaddr1, PAGE_SIZE);
+ memcpy(kaddr1, kaddr2, PAGE_SIZE);
+ memcpy(kaddr2, buf, PAGE_SIZE);
+ kunmap_atomic(kaddr1, KM_USER0);
+ kunmap_atomic(kaddr2, KM_USER1);
+}
+
+/**
+ * restore_highmem - for each highmem page that was allocated before
+ * the suspend and included in the suspend image, and also has been
+ * allocated by the "resume" kernel swap its current (ie. "before
+ * resume") contents with the previous (ie. "before suspend") one.
+ *
+ * If the resume eventually fails, we can call this function once
+ * again and restore the "before resume" highmem state.
+ */
+
+int restore_highmem(void)
+{
+ struct highmem_pbe *pbe = highmem_pblist;
+ void *buf;
+
+ if (!pbe)
+ return 0;
+
+ buf = get_image_page(GFP_ATOMIC, PG_SAFE);
+ if (!buf)
+ return -ENOMEM;
+
+ while (pbe) {
+ swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
+ pbe = pbe->next;
+ }
+ free_image_page(buf, PG_UNSAFE_CLEAR);
+ return 0;
}
+#endif /* CONFIG_HIGHMEM */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 1a3b0dd2c3fc..f133d4a6d817 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -34,34 +34,123 @@ extern char resume_file[];
#define SWSUSP_SIG "S1SUSPEND"
static struct swsusp_header {
- char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
- swp_entry_t image;
+ char reserved[PAGE_SIZE - 20 - sizeof(sector_t)];
+ sector_t image;
char orig_sig[10];
char sig[10];
} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
/*
- * Saving part...
+ * General things
*/
static unsigned short root_swap = 0xffff;
+static struct block_device *resume_bdev;
+
+/**
+ * submit - submit BIO request.
+ * @rw: READ or WRITE.
+ * @off physical offset of page.
+ * @page: page we're reading or writing.
+ * @bio_chain: list of pending biod (for async reading)
+ *
+ * Straight from the textbook - allocate and initialize the bio.
+ * If we're reading, make sure the page is marked as dirty.
+ * Then submit it and, if @bio_chain == NULL, wait.
+ */
+static int submit(int rw, pgoff_t page_off, struct page *page,
+ struct bio **bio_chain)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+ if (!bio)
+ return -ENOMEM;
+ bio->bi_sector = page_off * (PAGE_SIZE >> 9);
+ bio->bi_bdev = resume_bdev;
+ bio->bi_end_io = end_swap_bio_read;
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ printk("swsusp: ERROR: adding page to bio at %ld\n", page_off);
+ bio_put(bio);
+ return -EFAULT;
+ }
+
+ lock_page(page);
+ bio_get(bio);
+
+ if (bio_chain == NULL) {
+ submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+ wait_on_page_locked(page);
+ if (rw == READ)
+ bio_set_pages_dirty(bio);
+ bio_put(bio);
+ } else {
+ if (rw == READ)
+ get_page(page); /* These pages are freed later */
+ bio->bi_private = *bio_chain;
+ *bio_chain = bio;
+ submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+ }
+ return 0;
+}
+
+static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+ return submit(READ, page_off, virt_to_page(addr), bio_chain);
+}
+
+static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+ return submit(WRITE, page_off, virt_to_page(addr), bio_chain);
+}
+
+static int wait_on_bio_chain(struct bio **bio_chain)
+{
+ struct bio *bio;
+ struct bio *next_bio;
+ int ret = 0;
+
+ if (bio_chain == NULL)
+ return 0;
+
+ bio = *bio_chain;
+ if (bio == NULL)
+ return 0;
+ while (bio) {
+ struct page *page;
+
+ next_bio = bio->bi_private;
+ page = bio->bi_io_vec[0].bv_page;
+ wait_on_page_locked(page);
+ if (!PageUptodate(page) || PageError(page))
+ ret = -EIO;
+ put_page(page);
+ bio_put(bio);
+ bio = next_bio;
+ }
+ *bio_chain = NULL;
+ return ret;
+}
+
+/*
+ * Saving part
+ */
-static int mark_swapfiles(swp_entry_t start)
+static int mark_swapfiles(sector_t start)
{
int error;
- rw_swap_page_sync(READ, swp_entry(root_swap, 0),
- virt_to_page((unsigned long)&swsusp_header), NULL);
+ bio_read_page(swsusp_resume_block, &swsusp_header, NULL);
if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
!memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
swsusp_header.image = start;
- error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0),
- virt_to_page((unsigned long)&swsusp_header),
- NULL);
+ error = bio_write_page(swsusp_resume_block,
+ &swsusp_header, NULL);
} else {
- pr_debug("swsusp: Partition is not swap space.\n");
+ printk(KERN_ERR "swsusp: Swap header not found!\n");
error = -ENODEV;
}
return error;
@@ -74,12 +163,21 @@ static int mark_swapfiles(swp_entry_t start)
static int swsusp_swap_check(void) /* This is called before saving image */
{
- int res = swap_type_of(swsusp_resume_device);
+ int res;
+
+ res = swap_type_of(swsusp_resume_device, swsusp_resume_block);
+ if (res < 0)
+ return res;
+
+ root_swap = res;
+ resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_WRITE);
+ if (IS_ERR(resume_bdev))
+ return PTR_ERR(resume_bdev);
+
+ res = set_blocksize(resume_bdev, PAGE_SIZE);
+ if (res < 0)
+ blkdev_put(resume_bdev);
- if (res >= 0) {
- root_swap = res;
- return 0;
- }
return res;
}
@@ -90,36 +188,26 @@ static int swsusp_swap_check(void) /* This is called before saving image */
* @bio_chain: Link the next write BIO here
*/
-static int write_page(void *buf, unsigned long offset, struct bio **bio_chain)
+static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
{
- swp_entry_t entry;
- int error = -ENOSPC;
-
- if (offset) {
- struct page *page = virt_to_page(buf);
-
- if (bio_chain) {
- /*
- * Whether or not we successfully allocated a copy page,
- * we take a ref on the page here. It gets undone in
- * wait_on_bio_chain().
- */
- struct page *page_copy;
- page_copy = alloc_page(GFP_ATOMIC);
- if (page_copy == NULL) {
- WARN_ON_ONCE(1);
- bio_chain = NULL; /* Go synchronous */
- get_page(page);
- } else {
- memcpy(page_address(page_copy),
- page_address(page), PAGE_SIZE);
- page = page_copy;
- }
+ void *src;
+
+ if (!offset)
+ return -ENOSPC;
+
+ if (bio_chain) {
+ src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (src) {
+ memcpy(src, buf, PAGE_SIZE);
+ } else {
+ WARN_ON_ONCE(1);
+ bio_chain = NULL; /* Go synchronous */
+ src = buf;
}
- entry = swp_entry(root_swap, offset);
- error = rw_swap_page_sync(WRITE, entry, page, bio_chain);
+ } else {
+ src = buf;
}
- return error;
+ return bio_write_page(offset, src, bio_chain);
}
/*
@@ -137,11 +225,11 @@ static int write_page(void *buf, unsigned long offset, struct bio **bio_chain)
* at a time.
*/
-#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(long) - 1)
+#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
struct swap_map_page {
- unsigned long entries[MAP_PAGE_ENTRIES];
- unsigned long next_swap;
+ sector_t entries[MAP_PAGE_ENTRIES];
+ sector_t next_swap;
};
/**
@@ -151,7 +239,7 @@ struct swap_map_page {
struct swap_map_handle {
struct swap_map_page *cur;
- unsigned long cur_swap;
+ sector_t cur_swap;
struct bitmap_page *bitmap;
unsigned int k;
};
@@ -166,26 +254,6 @@ static void release_swap_writer(struct swap_map_handle *handle)
handle->bitmap = NULL;
}
-static void show_speed(struct timeval *start, struct timeval *stop,
- unsigned nr_pages, char *msg)
-{
- s64 elapsed_centisecs64;
- int centisecs;
- int k;
- int kps;
-
- elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
- do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
- centisecs = elapsed_centisecs64;
- if (centisecs == 0)
- centisecs = 1; /* avoid div-by-zero */
- k = nr_pages * (PAGE_SIZE / 1024);
- kps = (k * 100) / centisecs;
- printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
- centisecs / 100, centisecs % 100,
- kps / 1000, (kps % 1000) / 10);
-}
-
static int get_swap_writer(struct swap_map_handle *handle)
{
handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
@@ -196,7 +264,7 @@ static int get_swap_writer(struct swap_map_handle *handle)
release_swap_writer(handle);
return -ENOMEM;
}
- handle->cur_swap = alloc_swap_page(root_swap, handle->bitmap);
+ handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap);
if (!handle->cur_swap) {
release_swap_writer(handle);
return -ENOSPC;
@@ -205,43 +273,15 @@ static int get_swap_writer(struct swap_map_handle *handle)
return 0;
}
-static int wait_on_bio_chain(struct bio **bio_chain)
-{
- struct bio *bio;
- struct bio *next_bio;
- int ret = 0;
-
- if (bio_chain == NULL)
- return 0;
-
- bio = *bio_chain;
- if (bio == NULL)
- return 0;
- while (bio) {
- struct page *page;
-
- next_bio = bio->bi_private;
- page = bio->bi_io_vec[0].bv_page;
- wait_on_page_locked(page);
- if (!PageUptodate(page) || PageError(page))
- ret = -EIO;
- put_page(page);
- bio_put(bio);
- bio = next_bio;
- }
- *bio_chain = NULL;
- return ret;
-}
-
static int swap_write_page(struct swap_map_handle *handle, void *buf,
struct bio **bio_chain)
{
int error = 0;
- unsigned long offset;
+ sector_t offset;
if (!handle->cur)
return -EINVAL;
- offset = alloc_swap_page(root_swap, handle->bitmap);
+ offset = alloc_swapdev_block(root_swap, handle->bitmap);
error = write_page(buf, offset, bio_chain);
if (error)
return error;
@@ -250,7 +290,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
error = wait_on_bio_chain(bio_chain);
if (error)
goto out;
- offset = alloc_swap_page(root_swap, handle->bitmap);
+ offset = alloc_swapdev_block(root_swap, handle->bitmap);
if (!offset)
return -ENOSPC;
handle->cur->next_swap = offset;
@@ -261,7 +301,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
handle->cur_swap = offset;
handle->k = 0;
}
-out:
+ out:
return error;
}
@@ -315,7 +355,7 @@ static int save_image(struct swap_map_handle *handle,
error = err2;
if (!error)
printk("\b\b\b\bdone\n");
- show_speed(&start, &stop, nr_to_write, "Wrote");
+ swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
return error;
}
@@ -350,100 +390,50 @@ int swsusp_write(void)
struct swsusp_info *header;
int error;
- if ((error = swsusp_swap_check())) {
+ error = swsusp_swap_check();
+ if (error) {
printk(KERN_ERR "swsusp: Cannot find swap device, try "
"swapon -a.\n");
return error;
}
memset(&snapshot, 0, sizeof(struct snapshot_handle));
error = snapshot_read_next(&snapshot, PAGE_SIZE);
- if (error < PAGE_SIZE)
- return error < 0 ? error : -EFAULT;
+ if (error < PAGE_SIZE) {
+ if (error >= 0)
+ error = -EFAULT;
+
+ goto out;
+ }
header = (struct swsusp_info *)data_of(snapshot);
if (!enough_swap(header->pages)) {
printk(KERN_ERR "swsusp: Not enough free swap\n");
- return -ENOSPC;
+ error = -ENOSPC;
+ goto out;
}
error = get_swap_writer(&handle);
if (!error) {
- unsigned long start = handle.cur_swap;
+ sector_t start = handle.cur_swap;
+
error = swap_write_page(&handle, header, NULL);
if (!error)
error = save_image(&handle, &snapshot,
header->pages - 1);
+
if (!error) {
flush_swap_writer(&handle);
printk("S");
- error = mark_swapfiles(swp_entry(root_swap, start));
+ error = mark_swapfiles(start);
printk("|\n");
}
}
if (error)
free_all_swap_pages(root_swap, handle.bitmap);
release_swap_writer(&handle);
+ out:
+ swsusp_close();
return error;
}
-static struct block_device *resume_bdev;
-
-/**
- * submit - submit BIO request.
- * @rw: READ or WRITE.
- * @off physical offset of page.
- * @page: page we're reading or writing.
- * @bio_chain: list of pending biod (for async reading)
- *
- * Straight from the textbook - allocate and initialize the bio.
- * If we're reading, make sure the page is marked as dirty.
- * Then submit it and, if @bio_chain == NULL, wait.
- */
-static int submit(int rw, pgoff_t page_off, struct page *page,
- struct bio **bio_chain)
-{
- struct bio *bio;
-
- bio = bio_alloc(GFP_ATOMIC, 1);
- if (!bio)
- return -ENOMEM;
- bio->bi_sector = page_off * (PAGE_SIZE >> 9);
- bio->bi_bdev = resume_bdev;
- bio->bi_end_io = end_swap_bio_read;
-
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
- printk("swsusp: ERROR: adding page to bio at %ld\n", page_off);
- bio_put(bio);
- return -EFAULT;
- }
-
- lock_page(page);
- bio_get(bio);
-
- if (bio_chain == NULL) {
- submit_bio(rw | (1 << BIO_RW_SYNC), bio);
- wait_on_page_locked(page);
- if (rw == READ)
- bio_set_pages_dirty(bio);
- bio_put(bio);
- } else {
- if (rw == READ)
- get_page(page); /* These pages are freed later */
- bio->bi_private = *bio_chain;
- *bio_chain = bio;
- submit_bio(rw | (1 << BIO_RW_SYNC), bio);
- }
- return 0;
-}
-
-static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
- return submit(READ, page_off, virt_to_page(addr), bio_chain);
-}
-
-static int bio_write_page(pgoff_t page_off, void *addr)
-{
- return submit(WRITE, page_off, virt_to_page(addr), NULL);
-}
-
/**
* The following functions allow us to read data using a swap map
* in a file-alike way
@@ -456,17 +446,18 @@ static void release_swap_reader(struct swap_map_handle *handle)
handle->cur = NULL;
}
-static int get_swap_reader(struct swap_map_handle *handle,
- swp_entry_t start)
+static int get_swap_reader(struct swap_map_handle *handle, sector_t start)
{
int error;
- if (!swp_offset(start))
+ if (!start)
return -EINVAL;
- handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
+
+ handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
if (!handle->cur)
return -ENOMEM;
- error = bio_read_page(swp_offset(start), handle->cur, NULL);
+
+ error = bio_read_page(start, handle->cur, NULL);
if (error) {
release_swap_reader(handle);
return error;
@@ -478,7 +469,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
static int swap_read_page(struct swap_map_handle *handle, void *buf,
struct bio **bio_chain)
{
- unsigned long offset;
+ sector_t offset;
int error;
if (!handle->cur)
@@ -547,11 +538,11 @@ static int load_image(struct swap_map_handle *handle,
error = err2;
if (!error) {
printk("\b\b\b\bdone\n");
- snapshot_free_unused_memory(snapshot);
+ snapshot_write_finalize(snapshot);
if (!snapshot_image_loaded(snapshot))
error = -ENODATA;
}
- show_speed(&start, &stop, nr_to_read, "Read");
+ swsusp_show_speed(&start, &stop, nr_to_read, "Read");
return error;
}
@@ -600,12 +591,16 @@ int swsusp_check(void)
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
memset(&swsusp_header, 0, sizeof(swsusp_header));
- if ((error = bio_read_page(0, &swsusp_header, NULL)))
+ error = bio_read_page(swsusp_resume_block,
+ &swsusp_header, NULL);
+ if (error)
return error;
+
if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
/* Reset swap signature now */
- error = bio_write_page(0, &swsusp_header);
+ error = bio_write_page(swsusp_resume_block,
+ &swsusp_header, NULL);
} else {
return -EINVAL;
}
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 0b66659dc516..31aa0390c777 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -49,6 +49,7 @@
#include <linux/bootmem.h>
#include <linux/syscalls.h>
#include <linux/highmem.h>
+#include <linux/time.h>
#include "power.h"
@@ -64,10 +65,8 @@ int in_suspend __nosavedata = 0;
#ifdef CONFIG_HIGHMEM
unsigned int count_highmem_pages(void);
-int save_highmem(void);
int restore_highmem(void);
#else
-static inline int save_highmem(void) { return 0; }
static inline int restore_highmem(void) { return 0; }
static inline unsigned int count_highmem_pages(void) { return 0; }
#endif
@@ -134,18 +133,18 @@ static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit)
return 0;
}
-unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap)
+sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap)
{
unsigned long offset;
offset = swp_offset(get_swap_page_of_type(swap));
if (offset) {
- if (bitmap_set(bitmap, offset)) {
+ if (bitmap_set(bitmap, offset))
swap_free(swp_entry(swap, offset));
- offset = 0;
- }
+ else
+ return swapdev_block(swap, offset);
}
- return offset;
+ return 0;
}
void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
@@ -166,6 +165,34 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
}
/**
+ * swsusp_show_speed - print the time elapsed between two events represented by
+ * @start and @stop
+ *
+ * @nr_pages - number of pages processed between @start and @stop
+ * @msg - introductory message to print
+ */
+
+void swsusp_show_speed(struct timeval *start, struct timeval *stop,
+ unsigned nr_pages, char *msg)
+{
+ s64 elapsed_centisecs64;
+ int centisecs;
+ int k;
+ int kps;
+
+ elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
+ do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
+ centisecs = elapsed_centisecs64;
+ if (centisecs == 0)
+ centisecs = 1; /* avoid div-by-zero */
+ k = nr_pages * (PAGE_SIZE / 1024);
+ kps = (k * 100) / centisecs;
+ printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
+ centisecs / 100, centisecs % 100,
+ kps / 1000, (kps % 1000) / 10);
+}
+
+/**
* swsusp_shrink_memory - Try to free as much memory as needed
*
* ... but do not OOM-kill anyone
@@ -184,23 +211,37 @@ static inline unsigned long __shrink_memory(long tmp)
int swsusp_shrink_memory(void)
{
- long size, tmp;
+ long tmp;
struct zone *zone;
unsigned long pages = 0;
unsigned int i = 0;
char *p = "-\\|/";
+ struct timeval start, stop;
printk("Shrinking memory... ");
+ do_gettimeofday(&start);
do {
- size = 2 * count_highmem_pages();
- size += size / 50 + count_data_pages() + PAGES_FOR_IO;
+ long size, highmem_size;
+
+ highmem_size = count_highmem_pages();
+ size = count_data_pages() + PAGES_FOR_IO;
tmp = size;
+ size += highmem_size;
for_each_zone (zone)
- if (!is_highmem(zone) && populated_zone(zone)) {
- tmp -= zone->free_pages;
- tmp += zone->lowmem_reserve[ZONE_NORMAL];
- tmp += snapshot_additional_pages(zone);
+ if (populated_zone(zone)) {
+ if (is_highmem(zone)) {
+ highmem_size -= zone->free_pages;
+ } else {
+ tmp -= zone->free_pages;
+ tmp += zone->lowmem_reserve[ZONE_NORMAL];
+ tmp += snapshot_additional_pages(zone);
+ }
}
+
+ if (highmem_size < 0)
+ highmem_size = 0;
+
+ tmp += highmem_size;
if (tmp > 0) {
tmp = __shrink_memory(tmp);
if (!tmp)
@@ -212,7 +253,9 @@ int swsusp_shrink_memory(void)
}
printk("\b%c", p[i++%4]);
} while (tmp > 0);
+ do_gettimeofday(&stop);
printk("\bdone (%lu pages freed)\n", pages);
+ swsusp_show_speed(&start, &stop, pages, "Freed");
return 0;
}
@@ -223,6 +266,7 @@ int swsusp_suspend(void)
if ((error = arch_prepare_suspend()))
return error;
+
local_irq_disable();
/* At this point, device_suspend() has been called, but *not*
* device_power_down(). We *must* device_power_down() now.
@@ -235,23 +279,16 @@ int swsusp_suspend(void)
goto Enable_irqs;
}
- if ((error = save_highmem())) {
- printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
- goto Restore_highmem;
- }
-
save_processor_state();
if ((error = swsusp_arch_suspend()))
printk(KERN_ERR "Error %d suspending\n", error);
/* Restore control flow magically appears here */
restore_processor_state();
-Restore_highmem:
- restore_highmem();
/* NOTE: device_power_up() is just a resume() for devices
* that suspended with irqs off ... no overall powerup.
*/
device_power_up();
-Enable_irqs:
+ Enable_irqs:
local_irq_enable();
return error;
}
@@ -268,18 +305,23 @@ int swsusp_resume(void)
printk(KERN_ERR "Some devices failed to power down, very bad\n");
/* We'll ignore saved state, but this gets preempt count (etc) right */
save_processor_state();
- error = swsusp_arch_resume();
- /* Code below is only ever reached in case of failure. Otherwise
- * execution continues at place where swsusp_arch_suspend was called
- */
- BUG_ON(!error);
+ error = restore_highmem();
+ if (!error) {
+ error = swsusp_arch_resume();
+ /* The code below is only ever reached in case of a failure.
+ * Otherwise execution continues at place where
+ * swsusp_arch_suspend() was called
+ */
+ BUG_ON(!error);
+ /* This call to restore_highmem() undos the previous one */
+ restore_highmem();
+ }
/* The only reason why swsusp_arch_resume() can fail is memory being
* very tight, so we have to free it as soon as we can to avoid
* subsequent failures
*/
swsusp_free();
restore_processor_state();
- restore_highmem();
touch_softlockup_watchdog();
device_power_up();
local_irq_enable();
diff --git a/kernel/power/user.c b/kernel/power/user.c
index d991d3b0e5a4..89443b85163b 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -11,6 +11,7 @@
#include <linux/suspend.h>
#include <linux/syscalls.h>
+#include <linux/reboot.h>
#include <linux/string.h>
#include <linux/device.h>
#include <linux/miscdevice.h>
@@ -21,6 +22,7 @@
#include <linux/fs.h>
#include <linux/console.h>
#include <linux/cpu.h>
+#include <linux/freezer.h>
#include <asm/uaccess.h>
@@ -54,7 +56,8 @@ static int snapshot_open(struct inode *inode, struct file *filp)
filp->private_data = data;
memset(&data->handle, 0, sizeof(struct snapshot_handle));
if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
- data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device) : -1;
+ data->swap = swsusp_resume_device ?
+ swap_type_of(swsusp_resume_device, 0) : -1;
data->mode = O_RDONLY;
} else {
data->swap = -1;
@@ -76,10 +79,10 @@ static int snapshot_release(struct inode *inode, struct file *filp)
free_all_swap_pages(data->swap, data->bitmap);
free_bitmap(data->bitmap);
if (data->frozen) {
- down(&pm_sem);
+ mutex_lock(&pm_mutex);
thaw_processes();
enable_nonboot_cpus();
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
}
atomic_inc(&device_available);
return 0;
@@ -124,7 +127,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
{
int error = 0;
struct snapshot_data *data;
- loff_t offset, avail;
+ loff_t avail;
+ sector_t offset;
if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC)
return -ENOTTY;
@@ -140,7 +144,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
case SNAPSHOT_FREEZE:
if (data->frozen)
break;
- down(&pm_sem);
+ mutex_lock(&pm_mutex);
error = disable_nonboot_cpus();
if (!error) {
error = freeze_processes();
@@ -150,7 +154,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
error = -EBUSY;
}
}
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
if (!error)
data->frozen = 1;
break;
@@ -158,10 +162,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
case SNAPSHOT_UNFREEZE:
if (!data->frozen)
break;
- down(&pm_sem);
+ mutex_lock(&pm_mutex);
thaw_processes();
enable_nonboot_cpus();
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
data->frozen = 0;
break;
@@ -170,7 +174,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
error = -EPERM;
break;
}
- down(&pm_sem);
+ mutex_lock(&pm_mutex);
/* Free memory before shutting down devices. */
error = swsusp_shrink_memory();
if (!error) {
@@ -183,7 +187,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
}
resume_console();
}
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
if (!error)
error = put_user(in_suspend, (unsigned int __user *)arg);
if (!error)
@@ -191,13 +195,13 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
break;
case SNAPSHOT_ATOMIC_RESTORE:
+ snapshot_write_finalize(&data->handle);
if (data->mode != O_WRONLY || !data->frozen ||
!snapshot_image_loaded(&data->handle)) {
error = -EPERM;
break;
}
- snapshot_free_unused_memory(&data->handle);
- down(&pm_sem);
+ mutex_lock(&pm_mutex);
pm_prepare_console();
suspend_console();
error = device_suspend(PMSG_PRETHAW);
@@ -207,7 +211,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
}
resume_console();
pm_restore_console();
- up(&pm_sem);
+ mutex_unlock(&pm_mutex);
break;
case SNAPSHOT_FREE:
@@ -238,10 +242,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
break;
}
}
- offset = alloc_swap_page(data->swap, data->bitmap);
+ offset = alloc_swapdev_block(data->swap, data->bitmap);
if (offset) {
offset <<= PAGE_SHIFT;
- error = put_user(offset, (loff_t __user *)arg);
+ error = put_user(offset, (sector_t __user *)arg);
} else {
error = -ENOSPC;
}
@@ -264,7 +268,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
* so we need to recode them
*/
if (old_decode_dev(arg)) {
- data->swap = swap_type_of(old_decode_dev(arg));
+ data->swap = swap_type_of(old_decode_dev(arg), 0);
if (data->swap < 0)
error = -ENODEV;
} else {
@@ -282,7 +286,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
break;
}
- if (down_trylock(&pm_sem)) {
+ if (!mutex_trylock(&pm_mutex)) {
error = -EBUSY;
break;
}
@@ -309,8 +313,66 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
if (pm_ops->finish)
pm_ops->finish(PM_SUSPEND_MEM);
-OutS3:
- up(&pm_sem);
+ OutS3:
+ mutex_unlock(&pm_mutex);
+ break;
+
+ case SNAPSHOT_PMOPS:
+ switch (arg) {
+
+ case PMOPS_PREPARE:
+ if (pm_ops->prepare) {
+ error = pm_ops->prepare(PM_SUSPEND_DISK);
+ }
+ break;
+
+ case PMOPS_ENTER:
+ kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
+ error = pm_ops->enter(PM_SUSPEND_DISK);
+ break;
+
+ case PMOPS_FINISH:
+ if (pm_ops && pm_ops->finish) {
+ pm_ops->finish(PM_SUSPEND_DISK);
+ }
+ break;
+
+ default:
+ printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg);
+ error = -EINVAL;
+
+ }
+ break;
+
+ case SNAPSHOT_SET_SWAP_AREA:
+ if (data->bitmap) {
+ error = -EPERM;
+ } else {
+ struct resume_swap_area swap_area;
+ dev_t swdev;
+
+ error = copy_from_user(&swap_area, (void __user *)arg,
+ sizeof(struct resume_swap_area));
+ if (error) {
+ error = -EFAULT;
+ break;
+ }
+
+ /*
+ * User space encodes device types as two-byte values,
+ * so we need to recode them
+ */
+ swdev = old_decode_dev(swap_area.dev);
+ if (swdev) {
+ offset = swap_area.offset;
+ data->swap = swap_type_of(swdev, offset);
+ if (data->swap < 0)
+ error = -ENODEV;
+ } else {
+ data->swap = -1;
+ error = -EINVAL;
+ }
+ }
break;
default:
@@ -321,7 +383,7 @@ OutS3:
return error;
}
-static struct file_operations snapshot_fops = {
+static const struct file_operations snapshot_fops = {
.open = snapshot_open,
.release = snapshot_release,
.read = snapshot_read,